From e9bcad0139e9d64edd28694f6e2a428d54a635b9 Mon Sep 17 00:00:00 2001 From: danielp Date: Thu, 9 Oct 2025 16:06:17 +0100 Subject: [PATCH 01/14] Model update to match the schema 3.0 plan --- .../metadata/api/models/assembly.py | 64 +++++++++++------- .../production/metadata/api/models/dataset.py | 32 ++------- .../production/metadata/api/models/genome.py | 66 +++++++++++++++---- .../metadata/api/models/organism.py | 27 ++++---- .../production/metadata/api/models/release.py | 12 ++-- 5 files changed, 119 insertions(+), 82 deletions(-) diff --git a/src/ensembl/production/metadata/api/models/assembly.py b/src/ensembl/production/metadata/api/models/assembly.py index 99929c37..cb6b2421 100644 --- a/src/ensembl/production/metadata/api/models/assembly.py +++ b/src/ensembl/production/metadata/api/models/assembly.py @@ -17,30 +17,29 @@ from ensembl.production.metadata.api.models.base import Base, LoadAble -__all__ = ['Assembly', 'AssemblySequence'] +__all__ = ["Assembly", "AssemblySequence", "SequenceAlias"] class Assembly(LoadAble, Base): - __tablename__ = 'assembly' + __tablename__ = "assembly" assembly_id = Column(Integer, primary_key=True) - assembly_uuid = Column(String(32), unique=True, nullable=False, default=uuid.uuid4) + assembly_uuid = Column(String(40), unique=True, nullable=False, default=uuid.uuid4) ucsc_name = Column(String(16)) accession = Column(String(16), nullable=False, unique=True) level = Column(String(32), nullable=False) name = Column(String(128), nullable=False) accession_body = Column(String(32)) assembly_default = Column(String(128)) - tol_id = Column(String(32), unique=True) + tol_id = Column(String(32)) created = Column(DateTime) ensembl_name = Column(String(255), unique=True) - alt_accession = Column(String(16), nullable=True) is_reference = Column(TINYINT(1), nullable=False, default=0) - url_name = Column(String(128), nullable=False) # One to many relationships # assembly_id within assembly_sequence - assembly_sequences = relationship("AssemblySequence", back_populates="assembly", - cascade="all, delete, delete-orphan") + assembly_sequences = relationship( + "AssemblySequence", back_populates="assembly", cascade="all, delete, delete-orphan" + ) # assembly_id within genome genomes = relationship("Genome", back_populates="assembly", cascade="all, delete, delete-orphan") @@ -52,32 +51,51 @@ def is_released(self): class AssemblySequence(LoadAble, Base): - __tablename__ = 'assembly_sequence' + __tablename__ = "assembly_sequence" __table_args__ = ( - Index('assembly_sequence_assembly_id_accession_5f3e5119_uniq', 'assembly_id', 'accession', unique=True), + Index( + "assembly_sequence_assembly_id_accession_5f3e5119_uniq", "assembly_id", "accession", unique=True + ), ) assembly_sequence_id = Column(Integer, primary_key=True) - name = Column(String(128), unique=True) - assembly_id = Column(ForeignKey('assembly.assembly_id'), nullable=False, index=True) + name = Column(String(128)) + assembly_id = Column(ForeignKey("assembly.assembly_id"), nullable=False, index=True) accession = Column(String(128), nullable=False) chromosomal = Column(TINYINT(1), nullable=False, default=0) chromosome_rank = Column(Integer) length = Column(Integer, nullable=False) sequence_location = Column(String(10)) md5 = Column(String(32)) - # column need renaming as well sha512t24u = Column(String(128)) - type = Column(Enum('chromosome_group', 'plasmid', 'primary_assembly', 'contig', 'chromosome', 'scaffold', 'lrg', - 'supercontig', 'supscaffold'), server_default=text("'primary_assembly'")) + type = Column( + Enum( + "chromosome_group", + "plasmid", + "primary_assembly", + "contig", + "chromosome", + "scaffold", + "lrg", + "supercontig", + "supscaffold", + "non_ref_scaffold", + ), + server_default=text("'primary_assembly'"), + nullable=False, + ) is_circular = Column(TINYINT(1), nullable=False, default=0) - assembly = relationship('Assembly', back_populates="assembly_sequences") + additional = Column(TINYINT(1), nullable=False, default=0) + source = Column(String(128)) + assembly = relationship("Assembly", back_populates="assembly_sequences") + + +class SequenceAlias(LoadAble, Base): + __tablename__ = "sequence_alias" - # backward compatibility with old column name sha512t2u - @property - def sha512t4u(self): - return self.sha512t24u + sequence_alias_id = Column(Integer, primary_key=True) + assembly_sequence_id = Column(ForeignKey("assembly_sequence.assembly_sequence_id"), nullable=False) + alias = Column(String(128), nullable=False) + source = Column(String(128)) - @sha512t4u.setter - def sha512t4u(self, checksum): - self.sha512t24u = checksum + assembly_sequence = relationship("AssemblySequence", back_populates="sequence_alias") diff --git a/src/ensembl/production/metadata/api/models/dataset.py b/src/ensembl/production/metadata/api/models/dataset.py index 5303092e..186958f5 100644 --- a/src/ensembl/production/metadata/api/models/dataset.py +++ b/src/ensembl/production/metadata/api/models/dataset.py @@ -15,7 +15,7 @@ import uuid import sqlalchemy -from sqlalchemy import Column, Integer, String, text, ForeignKey, Index, JSON +from sqlalchemy import Column, Integer, String, text, ForeignKey, Index from sqlalchemy.dialects.mysql import DATETIME, TINYINT from sqlalchemy.orm import relationship, backref from sqlalchemy.sql import func @@ -34,6 +34,7 @@ class DatasetStatus(enum.Enum): PROCESSED = "Processed" RELEASED = "Released" FAULTY = "Faulty" + SUPPRESSED = "Suppressed" DatasetStatusType = sqlalchemy.types.Enum( @@ -47,22 +48,20 @@ class Attribute(LoadAble, Base): __tablename__ = 'attribute' attribute_id = Column(Integer, primary_key=True) - name = Column(String(128), nullable=False) + name = Column(String(128), nullable=False, unique=True) label = Column(String(128), nullable=False) description = Column(String(255)) required = Column(TINYINT(1), nullable=False, default=0) type = Column(Enum('string', 'percent', 'float', 'integer', 'bp', 'number'), server_default=text("'string'")) # One to many relationships - # attribute_id within dataset attribute dataset_attributes = relationship("DatasetAttribute", back_populates='attribute') - # many to one relationships class Dataset(LoadAble, Base): __tablename__ = 'dataset' dataset_id = Column(Integer, primary_key=True) - dataset_uuid = Column(String(32), nullable=False, unique=True, default=str(uuid.uuid4)) + dataset_uuid = Column(String(40), nullable=False, unique=True, default=str(uuid.uuid4)) dataset_type_id = Column(ForeignKey('dataset_type.dataset_type_id'), nullable=False, index=True) name = Column(String(128), nullable=False) version = Column(String(128)) @@ -70,19 +69,15 @@ class Dataset(LoadAble, Base): dataset_source_id = Column(ForeignKey('dataset_source.dataset_source_id'), nullable=False, index=True) label = Column(String(128), nullable=False) status = Column(DatasetStatusType, server_default=text('Submitted')) + parent_id = Column(Integer, ForeignKey('dataset.dataset_id'), nullable=True, index=True) # One to many relationships - # dataset_id to dataset attribute and genome dataset dataset_attributes = relationship("DatasetAttribute", back_populates='dataset', cascade="all, delete, delete-orphan") genome_datasets = relationship("GenomeDataset", back_populates='dataset', cascade="all, delete, delete-orphan") # many to one relationships - # dataset_type_id to dataset_type dataset_type = relationship('DatasetType', back_populates="datasets") - # dataset_source_id to dataset source dataset_source = relationship('DatasetSource', back_populates="datasets") - # parent dataset when created - parent_id = Column(Integer, ForeignKey('dataset.dataset_id'), nullable=True, index=True) children = relationship('Dataset', backref=backref("parent", remote_side=[dataset_id])) @property @@ -118,12 +113,8 @@ class DatasetAttribute(LoadAble, Base): value = Column(String(255), nullable=False) attribute_id = Column(ForeignKey('attribute.attribute_id'), nullable=False, index=True) dataset_id = Column(ForeignKey('dataset.dataset_id'), nullable=False, index=True) - # One to many relationships - # none # many to one relationships - # dataset_attribute_id to dataset attribute = relationship('Attribute', back_populates="dataset_attributes") - # attribute_id to attribute dataset = relationship('Dataset', back_populates="dataset_attributes") @@ -133,27 +124,18 @@ class DatasetSource(LoadAble, Base): dataset_source_id = Column(Integer, primary_key=True) type = Column(String(32), nullable=False) name = Column(String(255), nullable=False, unique=True) + location = Column(String(120)) # One to many relationships - # dataset_source_id to dataset datasets = relationship('Dataset', back_populates='dataset_source') - # many to one relationships - # none - class DatasetType(LoadAble, Base): __tablename__ = 'dataset_type' dataset_type_id = Column(Integer, primary_key=True) - name = Column(String(32), nullable=False) + name = Column(String(32), nullable=False, unique=True) label = Column(String(128), nullable=False) topic = Column(String(32), nullable=False) description = Column(String(255)) - details_uri = Column(String(255)) parent = Column(ForeignKey('dataset_type.dataset_type_id'), name='parent_id', nullable=True, index=True) - depends_on = Column(String(128), default=None) - filter_on = Column(JSON, default=None) # One to many relationships - # dataset_type_id to dataset datasets = relationship('Dataset', back_populates='dataset_type') - # many to one relationships - # none diff --git a/src/ensembl/production/metadata/api/models/genome.py b/src/ensembl/production/metadata/api/models/genome.py index 57f55a93..79a813cd 100644 --- a/src/ensembl/production/metadata/api/models/genome.py +++ b/src/ensembl/production/metadata/api/models/genome.py @@ -12,13 +12,13 @@ import logging import uuid -from sqlalchemy import Column, Integer, String, ForeignKey, UniqueConstraint +from sqlalchemy import Column, Integer, String, ForeignKey, UniqueConstraint, Enum from sqlalchemy.dialects.mysql import DATETIME, TINYINT from sqlalchemy.orm import relationship from ensembl.production.metadata.api.models.base import Base, LoadAble -__all__ = ['Genome', 'GenomeDataset', 'GenomeRelease'] +__all__ = ["Genome", "GenomeDataset", "GenomeRelease", "GenomeGroup", "GenomeGroupMember"] logger = logging.getLogger(__name__) @@ -31,14 +31,21 @@ class Genome(LoadAble, Base): assembly_id = Column(ForeignKey("assembly.assembly_id"), nullable=False, index=True) organism_id = Column(ForeignKey("organism.organism_id"), nullable=False, index=True) created = Column(DATETIME(fsp=6), nullable=False) - is_best = Column(TINYINT(1), nullable=False, default=0) - production_name = Column(String(255), nullable=False, unique=False) genebuild_version = Column(String(64), nullable=False, unique=False) + production_name = Column(String(120), nullable=False, unique=False) + annotation_source = Column(String(120), nullable=False, unique=False) genebuild_date = Column(String(20), nullable=False, unique=False) + suppressed = Column(TINYINT(1), nullable=False, default=0) + suppression_details = Column(String(255), nullable=True, unique=False) + url_name = Column(String(128), nullable=True, unique=False) # One to many relationships # genome_id to genome_dataset and genome release - genome_datasets = relationship("GenomeDataset", back_populates="genome", cascade="all, delete, delete-orphan") - genome_releases = relationship("GenomeRelease", back_populates="genome", cascade="all, delete, delete-orphan") + genome_datasets = relationship( + "GenomeDataset", back_populates="genome", cascade="all, delete, delete-orphan" + ) + genome_releases = relationship( + "GenomeRelease", back_populates="genome", cascade="all, delete, delete-orphan" + ) # many to one relationships # assembly_id to assembly assembly = relationship("Assembly", back_populates="genomes") @@ -46,7 +53,6 @@ class Genome(LoadAble, Base): organism = relationship("Organism", back_populates="genomes") - class GenomeDataset(LoadAble, Base): __tablename__ = "genome_dataset" @@ -58,12 +64,17 @@ class GenomeDataset(LoadAble, Base): UniqueConstraint("genome_id", "dataset_id", "release_id", name="genome_dataset_release_uidx"), # genome_dataset_id to genome - dataset = relationship("Dataset", back_populates="genome_datasets", order_by='Dataset.name, desc(Dataset.created)') + dataset = relationship( + "Dataset", back_populates="genome_datasets", order_by="Dataset.name, desc(Dataset.created)" + ) # genome_id to genome - genome = relationship("Genome", back_populates="genome_datasets", order_by='Dataset.name, desc(Genome.created)') + genome = relationship( + "Genome", back_populates="genome_datasets", order_by="Dataset.name, desc(Genome.created)" + ) # release_id to release - ensembl_release = relationship("EnsemblRelease", back_populates="genome_datasets", - order_by='desc(EnsemblRelease.version)') + ensembl_release = relationship( + "EnsemblRelease", back_populates="genome_datasets", order_by="desc(EnsemblRelease.version)" + ) class GenomeRelease(LoadAble, Base): @@ -74,10 +85,39 @@ class GenomeRelease(LoadAble, Base): genome_id = Column(ForeignKey("genome.genome_id"), nullable=False, index=True) release_id = Column(ForeignKey("ensembl_release.release_id"), nullable=False, index=True) is_current = Column(TINYINT(1), nullable=False, default=0) - # One to many relationships - # none # many to one relationships # genome_release_id to genome_release genome = relationship("Genome", back_populates="genome_releases") # release_id to ensembl release ensembl_release = relationship("EnsemblRelease", back_populates="genome_releases") + + +class GenomeGroup(LoadAble, Base): + __tablename__ = "genome_group" + + genome_group_id = Column(Integer, primary_key=True) + type = Column(Enum("compara_reference", "structural_variant", "project"), nullable=False) + name = Column(String(128), nullable=False, unique=True) + label = Column(String(128)) + searchable = Column(TINYINT(1), nullable=False, default=0) + description = Column(String(255)) + + # One to many relationships + # genome_group_id to organism_group_member + genome_group_members = relationship("GenomeGroupMember", back_populates="genome_group") + + +class GenomeGroupMember(LoadAble, Base): + __tablename__ = "genome_group_member" + + genome_group_member_id = Column(Integer, primary_key=True) + is_reference = Column(TINYINT(1), nullable=False, default=0) + genome_id = Column(ForeignKey("genome.genome_id"), nullable=False) + genome_group_id = Column(ForeignKey("genome_group.genome_group_id"), nullable=False) + release_id = Column(ForeignKey("ensembl_release.release_id")) + is_current = Column(TINYINT(1), nullable=False, default=0) + + # many to one relationships + genome_group = relationship("GenomeGroup", back_populates="genome_group_members") + genome = relationship("Genome", back_populates="genome_group_members") + release = relationship("Release", back_populates="genome_group_members") diff --git a/src/ensembl/production/metadata/api/models/organism.py b/src/ensembl/production/metadata/api/models/organism.py index 92bdb110..46474fa9 100644 --- a/src/ensembl/production/metadata/api/models/organism.py +++ b/src/ensembl/production/metadata/api/models/organism.py @@ -17,14 +17,14 @@ from ensembl.production.metadata.api.models.base import Base, LoadAble -__all__ = ['Organism', 'OrganismGroup', 'OrganismGroupMember'] +__all__ = ["Organism", "OrganismGroup", "OrganismGroupMember"] class Organism(LoadAble, Base): __tablename__ = "organism" organism_id = Column(Integer, primary_key=True) - organism_uuid = Column(String(32), unique=True, nullable=False, default=uuid.uuid4) + organism_uuid = Column(String(40), unique=True, nullable=False, default=uuid.uuid4) taxonomy_id = Column(Integer, nullable=False) species_taxonomy_id = Column(Integer) common_name = Column(String(128), nullable=True) @@ -32,37 +32,36 @@ class Organism(LoadAble, Base): scientific_name = Column(String(128)) biosample_id = Column(String(128), nullable=False, unique=True) scientific_parlance_name = Column(String(255)) + rank = Column(Integer, default=0) + strain_type = Column(String(128), nullable=True, unique=False) # One to many relationships # Organism_id to organism_group_member and genome genomes = relationship("Genome", back_populates="organism", cascade="all, delete, delete-orphan") organism_group_members = relationship("OrganismGroupMember", back_populates="organism") - strain_type = Column(String(128), nullable=True, unique=False) - class OrganismGroup(LoadAble, Base): __tablename__ = "organism_group" - __table_args__ = ( - Index("group_type_name_63c2f6ac_uniq", "type", "name", unique=True), - ) + __table_args__ = (Index("group_type_name_63c2f6ac_uniq", "type", "name", unique=True),) organism_group_id = Column(Integer, primary_key=True) - type = Column(String(32), nullable=False) + type = Column(String(32)) name = Column(String(255), nullable=False) code = Column(String(48), unique=True) # One to many relationships # Organism_group_id to organism_group_member organism_group_members = relationship("OrganismGroupMember", back_populates="organism_group") - # many to one relationships - # none - class OrganismGroupMember(LoadAble, Base): __tablename__ = "organism_group_member" __table_args__ = ( - Index("organism_group_member_organism_id_organism_gro_fe8f49ac_uniq", "organism_id", "organism_group_id", - unique=True), + Index( + "organism_group_member_organism_id_organism_gro_fe8f49ac_uniq", + "organism_id", + "organism_group_id", + unique=True, + ), ) organism_group_member_id = Column(Integer, primary_key=True) @@ -70,8 +69,6 @@ class OrganismGroupMember(LoadAble, Base): order = Column(Integer, nullable=True) organism_id = Column(ForeignKey("organism.organism_id"), nullable=False) organism_group_id = Column(ForeignKey("organism_group.organism_group_id"), nullable=False, index=True) - # One to many relationships - # none # many to one relationships # Organism_group_id to organism_group_member # organism_id to organism diff --git a/src/ensembl/production/metadata/api/models/release.py b/src/ensembl/production/metadata/api/models/release.py index 449bc2fa..add76964 100644 --- a/src/ensembl/production/metadata/api/models/release.py +++ b/src/ensembl/production/metadata/api/models/release.py @@ -12,7 +12,7 @@ import enum import sqlalchemy -from sqlalchemy import Column, Integer, String, Index, DECIMAL, Date, ForeignKey +from sqlalchemy import Column, Integer, String, Index, DECIMAL, Date, ForeignKey, Enum from sqlalchemy.dialects.mysql import TINYINT from sqlalchemy.orm import relationship @@ -52,15 +52,15 @@ class EnsemblRelease(LoadAble, Base): Index('ensembl_release_version_site_id_b743399a_uniq', 'version', 'site_id', unique=True), ) - release_id = Column(Integer, primary_key=True, nullable=True) + release_id = Column(Integer, primary_key=True) version = Column(DECIMAL(10, 1), nullable=False) - release_date = Column(Date, nullable=True) - label = Column(String(64)) + release_date = Column(Date, nullable=False) + label = Column(String(64), nullable=False) is_current = Column(TINYINT(1), nullable=False, default=0) site_id = Column(ForeignKey('ensembl_site.site_id'), index=True) - release_type = Column(String(16), nullable=False) + release_type = Column(Enum('partial', 'integrated'), nullable=False) status = Column(ReleaseStatusType, nullable=False, default=ReleaseStatus.PLANNED) - name = Column(String(3), nullable=False) + name = Column(String(3)) # One to many relationships # release_id to genome dataset and genome release genome_datasets = relationship('GenomeDataset', back_populates='ensembl_release') From 7c7f14d7cc269617b12c827ccacb1d3c99f52cc7 Mon Sep 17 00:00:00 2001 From: danielp Date: Tue, 14 Oct 2025 12:12:37 +0100 Subject: [PATCH 02/14] Model correction and a couple temporary scripts --- .../metadata/api/models/assembly.py | 2 + .../production/metadata/api/models/base.py | 2 +- src/tests/databases/dump_mysql_db.py | 183 ++++++++++++++ src/tests/databases/load_mysql_db.py | 227 ++++++++++++++++++ 4 files changed, 413 insertions(+), 1 deletion(-) create mode 100644 src/tests/databases/dump_mysql_db.py create mode 100644 src/tests/databases/load_mysql_db.py diff --git a/src/ensembl/production/metadata/api/models/assembly.py b/src/ensembl/production/metadata/api/models/assembly.py index cb6b2421..a2ec8cd0 100644 --- a/src/ensembl/production/metadata/api/models/assembly.py +++ b/src/ensembl/production/metadata/api/models/assembly.py @@ -88,6 +88,8 @@ class AssemblySequence(LoadAble, Base): additional = Column(TINYINT(1), nullable=False, default=0) source = Column(String(128)) assembly = relationship("Assembly", back_populates="assembly_sequences") + sequence_aliases = relationship("SequenceAlias", back_populates="assembly_sequence", + cascade="all, delete, delete-orphan") class SequenceAlias(LoadAble, Base): diff --git a/src/ensembl/production/metadata/api/models/base.py b/src/ensembl/production/metadata/api/models/base.py index 788d6bc3..d9931eb2 100644 --- a/src/ensembl/production/metadata/api/models/base.py +++ b/src/ensembl/production/metadata/api/models/base.py @@ -9,7 +9,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.orm import declarative_base Base = declarative_base() diff --git a/src/tests/databases/dump_mysql_db.py b/src/tests/databases/dump_mysql_db.py new file mode 100644 index 00000000..c4cc1c8f --- /dev/null +++ b/src/tests/databases/dump_mysql_db.py @@ -0,0 +1,183 @@ +#!/usr/bin/env python3 +""" +Dump MySQL database to table.sql and .txt files. +Creates the same format that load_mysql_db.py expects. +""" + +import argparse +import csv +from pathlib import Path +from urllib.parse import urlparse + +import mysql.connector +from mysql.connector import Error + + +def parse_mysql_uri(uri): + """Parse MySQL URI and return connection parameters.""" + parsed = urlparse(uri) + + return { + "host": parsed.hostname, + "port": parsed.port or 3306, + "user": parsed.username, + "password": parsed.password, + "database": parsed.path.lstrip("/"), + } + + +def get_table_create_statement(cursor, table_name): + """Get the CREATE TABLE statement for a table.""" + cursor.execute(f"SHOW CREATE TABLE `{table_name}`") + result = cursor.fetchone() + return result[1] # Second column is the CREATE TABLE statement + + +def dump_schema(cursor, output_dir): + """Dump all table schemas to table.sql.""" + # Get all tables in database + cursor.execute("SHOW TABLES") + tables = [row[0] for row in cursor.fetchall()] + + schema_file = output_dir / "table.sql" + + with open(schema_file, "w", encoding="utf-8") as f: + for table_name in tables: + create_stmt = get_table_create_statement(cursor, table_name) + f.write(create_stmt) + f.write(";\n\n") + + print(f"✓ Exported schema for {len(tables)} tables to {schema_file}") + return tables + + +def dump_table_data(cursor, table_name, output_dir): + """Dump table data to a tab-separated .txt file.""" + output_file = output_dir / f"{table_name}.txt" + + # Get all data from table + cursor.execute(f"SELECT * FROM `{table_name}`") + rows = cursor.fetchall() + + if not rows: + # Create empty file for consistency + output_file.touch() + return 0 + + # Write to TSV file + with open(output_file, "w", encoding="utf-8", newline="") as f: + writer = csv.writer(f, delimiter="\t", lineterminator="\n") + + for row in rows: + # Convert None to \N (MySQL NULL representation) + converted_row = ["\\N" if val is None else str(val) for val in row] + writer.writerow(converted_row) + + return len(rows) + + +def dump_database(mysql_url, output_dir, overwrite=False): + """ + Dump MySQL database to table.sql and .txt files. + + Args: + mysql_url: MySQL connection URL (mysql://user:pass@host:port/database) + output_dir: Output directory for schema and data files + overwrite: Whether to overwrite existing directory + """ + output_path = Path(output_dir) + + # Check if output directory exists + if output_path.exists(): + if not overwrite: + print(f"✗ Error: Directory {output_dir} already exists. Use --overwrite to replace it.") + return False + print(f"⚠ Overwriting existing directory: {output_dir}") + else: + output_path.mkdir(parents=True, exist_ok=True) + print(f"✓ Created output directory: {output_dir}") + + # Parse connection parameters + try: + conn_params = parse_mysql_uri(mysql_url) + db_name = conn_params["database"] + + if not db_name: + print("✗ Error: No database specified in URL") + print("Expected format: mysql://user:password@host:port/database_name") + return False + + print(f"\nDumping database: {db_name}") + print(f"MySQL Server: {conn_params['host']}:{conn_params['port']}") + print(f"Output directory: {output_dir}\n") + except Exception as e: + print(f"✗ Error parsing MySQL URI: {e}") + print("Expected format: mysql://user:password@host:port/database_name") + return False + + try: + # Connect to MySQL + conn = mysql.connector.connect(**conn_params) + cursor = conn.cursor() + + # Dump schema + tables = dump_schema(cursor, output_path) + + # Dump data for each table + print("\nExporting table data...") + total_rows = 0 + + for table_name in tables: + rows = dump_table_data(cursor, table_name, output_path) + total_rows += rows + print(f" ✓ {table_name}: {rows} rows") + + cursor.close() + conn.close() + + print(f"\n{'=' * 60}") + print(f"✓ Successfully dumped database: {db_name}") + print(f" - Schema: table.sql") + print(f" - Data: {len(tables)} tables, {total_rows} total rows") + print(f" - Location: {output_path.absolute()}") + return True + + except Error as e: + print(f"✗ MySQL Error: {e}") + return False + except Exception as e: + print(f"✗ Error: {e}") + return False + + +def main(): + parser = argparse.ArgumentParser( + description="Dump MySQL database to table.sql and .txt data files", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + %(prog)s mysql://user:pass@host:port/my_database ./output_dir + %(prog)s mysql://ensadmin:ensembl@mysql-server:4508/test_core_1 databases/core_1 + %(prog)s mysql://user:pass@host/testdb ./testdb --overwrite + +The script creates: + - table.sql: Complete schema for all tables + - .txt: Tab-separated data for each table (no headers) + +This format is compatible with load_mysql_db.py for re-importing. + """, + ) + parser.add_argument("mysql_url", help="MySQL connection URL (mysql://user:password@host:port/database)") + parser.add_argument("output_dir", help="Output directory for schema and data files") + parser.add_argument( + "-o", "--overwrite", action="store_true", help="Overwrite output directory if it exists" + ) + + args = parser.parse_args() + + success = dump_database(args.mysql_url, args.output_dir, args.overwrite) + exit(0 if success else 1) + + +if __name__ == "__main__": + main() diff --git a/src/tests/databases/load_mysql_db.py b/src/tests/databases/load_mysql_db.py new file mode 100644 index 00000000..7045c8d8 --- /dev/null +++ b/src/tests/databases/load_mysql_db.py @@ -0,0 +1,227 @@ +#!/usr/bin/env python3 +""" +Load a test database into MySQL from directory structure. +Reads table.sql schema file and tab-separated .txt data files. +""" + +import argparse +import csv +from pathlib import Path +from urllib.parse import urlparse + +import mysql.connector +from mysql.connector import Error + + +def parse_mysql_uri(uri): + """Parse MySQL URI and return connection parameters.""" + parsed = urlparse(uri) + + return { + "host": parsed.hostname, + "port": parsed.port or 3306, + "user": parsed.username, + "password": parsed.password, + "database": None, # We'll create databases ourselves + } + + +def create_database(cursor, db_name, drop_existing=False): + """Create database, optionally dropping it first if it exists.""" + try: + if drop_existing: + print(f"Dropping existing database '{db_name}' if it exists...") + cursor.execute(f"DROP DATABASE IF EXISTS `{db_name}`") + print(f"✓ Database dropped") + + cursor.execute(f"CREATE DATABASE IF NOT EXISTS `{db_name}`") + print(f"✓ Database '{db_name}' ready") + return True + except Error as e: + print(f"✗ Error creating database: {e}") + return False + + +def load_schema(cursor, schema_file): + """Load SQL schema from file.""" + with open(schema_file, "r", encoding="utf-8") as f: + schema_sql = f.read() + + # Split into individual statements (handle multi-statement SQL) + statements = [s.strip() for s in schema_sql.split(";") if s.strip()] + + for statement in statements: + try: + cursor.execute(statement) + except Error as e: + print(f"✗ Error executing statement: {e}") + print(f" Statement: {statement[:100]}...") + raise + + print(f"✓ Schema loaded") + + +def get_table_columns(cursor, table_name): + """Get column names for a table.""" + cursor.execute(f"SHOW COLUMNS FROM `{table_name}`") + return [row[0] for row in cursor.fetchall()] + + +def load_table_data(cursor, table_name, txt_file): + """Load data from tab-separated file into table.""" + + # Get column information + columns = get_table_columns(cursor, table_name) + column_count = len(columns) + + # Prepare INSERT statement + placeholders = ",".join(["%s"] * column_count) + insert_sql = f"INSERT INTO `{table_name}` VALUES ({placeholders})" + + rows_inserted = 0 + with open(txt_file, "r", encoding="utf-8") as f: + reader = csv.reader(f, delimiter="\t") + + for row in reader: + # Handle MySQL NULL representation and clean data + cleaned_row = [] + for val in row: + if val == "\\N": + cleaned_row.append(None) + else: + # Strip trailing commas and whitespace + cleaned_row.append(val.rstrip(",").strip() if val else val) + + try: + cursor.execute(insert_sql, cleaned_row) + rows_inserted += 1 + except Error as e: + print(f"⚠ Warning: Error inserting row into {table_name}: {e}") + print(f" Row data: {cleaned_row}") + + return rows_inserted + + +def load_database(db_dir, mysql_uri, db_name=None, drop_existing=False): + """Load a database directory into MySQL.""" + db_path = Path(db_dir) + + if not db_path.exists(): + print(f"✗ Error: Directory {db_dir} does not exist") + return False + + if not db_path.is_dir(): + print(f"✗ Error: {db_dir} is not a directory") + return False + + # Use provided database name or default to test_ + if not db_name: + db_name = f"test_{db_path.name}" + + print(f"\nLoading database from: {db_path}") + print(f"Target database name: {db_name}\n") + + # Check for schema file + schema_file = db_path / "table.sql" + if not schema_file.exists(): + print(f"✗ Error: No table.sql found in {db_path}") + return False + + # Parse MySQL connection + try: + connection_params = parse_mysql_uri(mysql_uri) + except Exception as e: + print(f"✗ Error parsing MySQL URI: {e}") + print("Expected format: mysql://user:password@host:port/") + return False + + try: + # Connect to MySQL server + conn = mysql.connector.connect(**connection_params) + cursor = conn.cursor() + + # Create and use database + if not create_database(cursor, db_name, drop_existing): + return False + + cursor.execute(f"USE `{db_name}`") + + # Disable foreign key checks during data load + cursor.execute("SET FOREIGN_KEY_CHECKS=0") + print(f"✓ Foreign key checks disabled") + + # Load schema + load_schema(cursor, schema_file) + conn.commit() + + # Load data from all .txt files + txt_files = sorted(db_path.glob("*.txt")) + + if not txt_files: + print(f"⚠ No data files found") + + for txt_file in txt_files: + table_name = txt_file.stem + + # Check if table exists + cursor.execute("SHOW TABLES LIKE %s", (table_name,)) + if not cursor.fetchone(): + print(f"⚠ Table '{table_name}' not found in schema, skipping {txt_file.name}") + continue + + rows = load_table_data(cursor, table_name, txt_file) + conn.commit() + print(f"✓ Loaded {rows} rows into {table_name}") + + # Re-enable foreign key checks + cursor.execute("SET FOREIGN_KEY_CHECKS=1") + print(f"\n✓ Foreign key checks re-enabled") + + cursor.close() + conn.close() + + print(f"\n{'=' * 60}") + print(f"✓ Successfully loaded database: {db_name}") + return True + + except Error as e: + print(f"✗ MySQL Error: {e}") + return False + except Exception as e: + print(f"✗ Error: {e}") + return False + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Load a test database into MySQL from directory structure", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + %(prog)s core_1 mysql://root:password@localhost:3306/ + %(prog)s /path/to/core_1 mysql://user:pass@db.example.com:3306/ + %(prog)s core_1 mysql://root:password@localhost:3306/ --name my_test_db + %(prog)s core_1 mysql://root:password@localhost:3306/ --drop + +The script will create a database named 'test_' by default, +or use the name specified with --name. Use --drop to drop and recreate the +database if it already exists. + """, + ) + parser.add_argument("directory", help="Directory containing table.sql and .txt data files") + parser.add_argument("mysql_uri", help="MySQL connection URI (mysql://user:password@host:port/)") + parser.add_argument("-n", "--name", help="Database name (default: test_)", default=None) + parser.add_argument("-d", "--drop", action="store_true", help="Drop database if it exists before loading") + + args = parser.parse_args() + + # Check if mysql-connector-python is installed + try: + import mysql.connector + except ImportError: + print("✗ Error: mysql-connector-python is not installed") + print("Install it with: pip install mysql-connector-python") + exit(1) + + success = load_database(args.directory, args.mysql_uri, args.name, args.drop) + exit(0 if success else 1) From e505efc014390a3549dad7ecc90af431195f04de Mon Sep 17 00:00:00 2001 From: danielp Date: Tue, 14 Oct 2025 12:18:52 +0100 Subject: [PATCH 03/14] Model correction --- src/ensembl/production/metadata/api/models/assembly.py | 3 +-- src/ensembl/production/metadata/api/models/genome.py | 5 ++++- src/ensembl/production/metadata/api/models/release.py | 2 ++ 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/ensembl/production/metadata/api/models/assembly.py b/src/ensembl/production/metadata/api/models/assembly.py index a2ec8cd0..be78e07f 100644 --- a/src/ensembl/production/metadata/api/models/assembly.py +++ b/src/ensembl/production/metadata/api/models/assembly.py @@ -99,5 +99,4 @@ class SequenceAlias(LoadAble, Base): assembly_sequence_id = Column(ForeignKey("assembly_sequence.assembly_sequence_id"), nullable=False) alias = Column(String(128), nullable=False) source = Column(String(128)) - - assembly_sequence = relationship("AssemblySequence", back_populates="sequence_alias") + assembly_sequence = relationship("AssemblySequence", back_populates="sequence_aliases") diff --git a/src/ensembl/production/metadata/api/models/genome.py b/src/ensembl/production/metadata/api/models/genome.py index 79a813cd..bd2a5e0a 100644 --- a/src/ensembl/production/metadata/api/models/genome.py +++ b/src/ensembl/production/metadata/api/models/genome.py @@ -46,6 +46,9 @@ class Genome(LoadAble, Base): genome_releases = relationship( "GenomeRelease", back_populates="genome", cascade="all, delete, delete-orphan" ) + genome_group_members = relationship( + "GenomeGroupMember", back_populates="genome", cascade="all, delete, delete-orphan" + ) # many to one relationships # assembly_id to assembly assembly = relationship("Assembly", back_populates="genomes") @@ -120,4 +123,4 @@ class GenomeGroupMember(LoadAble, Base): # many to one relationships genome_group = relationship("GenomeGroup", back_populates="genome_group_members") genome = relationship("Genome", back_populates="genome_group_members") - release = relationship("Release", back_populates="genome_group_members") + ensembl_release = relationship("EnsemblRelease", back_populates="genome_group_members") diff --git a/src/ensembl/production/metadata/api/models/release.py b/src/ensembl/production/metadata/api/models/release.py index add76964..dfdda00d 100644 --- a/src/ensembl/production/metadata/api/models/release.py +++ b/src/ensembl/production/metadata/api/models/release.py @@ -65,6 +65,8 @@ class EnsemblRelease(LoadAble, Base): # release_id to genome dataset and genome release genome_datasets = relationship('GenomeDataset', back_populates='ensembl_release') genome_releases = relationship('GenomeRelease', back_populates='ensembl_release') + genome_group_members = relationship('GenomeGroupMember', back_populates='ensembl_release') + # many to one relationships # Added fileter condition on every join to EnsemblSite for code clarity # No other than configure site data should be returned From 76f2a8e7219ea878ac919ce1b670426a658f12ee Mon Sep 17 00:00:00 2001 From: danielp Date: Thu, 16 Oct 2025 09:23:19 +0100 Subject: [PATCH 04/14] Predeletion commit of sqlite testing Includes three scripts for moving txt/sql files to a mysql server and dumping them back as sqlite. The conftest.py has options for running with the sqlite db A second commit will follow with deletion of all mysql related testing --- .../metadata/api/adaptors/genome.py | 9 +- .../metadata/api/adaptors/release.py | 175 +++++++--- .../metadata/api/factories/datasets.py | 13 - .../metadata/api/factories/genomes.py | 320 ++++++++++++------ .../metadata/api/factories/release.py | 16 +- .../production/metadata/api/factory.py | 4 +- .../metadata/api/models/assembly.py | 2 +- .../production/metadata/api/models/dataset.py | 2 +- .../production/metadata/api/models/genome.py | 2 +- .../metadata/api/models/organism.py | 2 +- .../metadata/grpc/protobuf_msg_factory.py | 3 +- .../scripts/organism_to_organismgroup.py | 19 +- .../production/metadata/updater/base.py | 4 +- .../production/metadata/updater/core.py | 17 +- src/tests/databases/compara_db.db | Bin 0 -> 8192 bytes src/tests/databases/core_1.db | Bin 0 -> 28672 bytes src/tests/databases/core_1/attrib_type.txt | 4 +- src/tests/databases/core_1/coord_system.txt | 2 +- src/tests/databases/core_1/meta.txt | 17 +- .../databases/core_1/seq_region_attrib.txt | 2 +- src/tests/databases/core_1/table.sql | 133 ++++---- src/tests/databases/core_2.db | Bin 0 -> 28672 bytes src/tests/databases/core_3.db | Bin 0 -> 28672 bytes src/tests/databases/core_4.db | Bin 0 -> 28672 bytes src/tests/databases/core_5.db | Bin 0 -> 28672 bytes src/tests/databases/core_6.db | Bin 0 -> 28672 bytes src/tests/databases/core_7.db | Bin 0 -> 28672 bytes src/tests/databases/core_8.db | Bin 0 -> 28672 bytes src/tests/databases/core_9.db | 0 .../databases/ensembl_genome_metadata.db | Bin 0 -> 262144 bytes src/tests/databases/mysql2sqlite.py | 244 +++++++++++++ src/tests/databases/ncbi_taxonomy.db | Bin 0 -> 32768 bytes .../ncbi_taxonomy/ncbi_taxa_name.txt | 9 +- src/tests/databases/ncbi_taxonomy/table.sql | 3 +- src/tests/test_dataset_factory.py | 8 +- src/tests/test_genome_factory.py | 23 +- src/tests/test_grpc_release.py | 13 +- src/tests/test_organism_to_organismgroup.py | 12 +- src/tests/test_protobuf_msg_factory.py | 64 ++-- src/tests/test_release_factory.py | 18 +- src/tests/test_updater.py | 130 +++---- src/tests/test_utils.py | 11 +- 42 files changed, 852 insertions(+), 429 deletions(-) create mode 100644 src/tests/databases/compara_db.db create mode 100644 src/tests/databases/core_1.db create mode 100644 src/tests/databases/core_2.db create mode 100644 src/tests/databases/core_3.db create mode 100644 src/tests/databases/core_4.db create mode 100644 src/tests/databases/core_5.db create mode 100644 src/tests/databases/core_6.db create mode 100644 src/tests/databases/core_7.db create mode 100644 src/tests/databases/core_8.db create mode 100644 src/tests/databases/core_9.db create mode 100644 src/tests/databases/ensembl_genome_metadata.db create mode 100644 src/tests/databases/mysql2sqlite.py create mode 100644 src/tests/databases/ncbi_taxonomy.db diff --git a/src/ensembl/production/metadata/api/adaptors/genome.py b/src/ensembl/production/metadata/api/adaptors/genome.py index a4d85d03..cbee9da8 100644 --- a/src/ensembl/production/metadata/api/adaptors/genome.py +++ b/src/ensembl/production/metadata/api/adaptors/genome.py @@ -159,7 +159,7 @@ def fetch_genomes(self, genome_id=None, genome_uuid=None, genome_tag=None, organ Args: genome_id (Union[int, List[int]]): The ID(s) of the genome(s) to fetch. genome_uuid str|None: The UUID of the genome to fetch. - genome_tag (Union[str, List[str]]): genome_tag value is either in Assembly.url_name or told_id. + genome_tag (Union[str, List[str]]): genome_tag value is genome.url_name organism_uuid (Union[str, List[str]]): The UUID(s) of the organism(s) to fetch. assembly_uuid (Union[str, List[str]]): The UUID(s) of the assembly(s) to fetch. assembly_accession (Union[str, List[str]]): The assenbly accession of the assembly(s) to fetch. @@ -229,12 +229,7 @@ def fetch_genomes(self, genome_id=None, genome_uuid=None, genome_tag=None, organ genome_select = genome_select.filter(Genome.genome_uuid == genome_uuid) if genome_tag is not None: - genome_select = genome_select.filter( - db.or_( - Assembly.url_name.in_(genome_tag), - Assembly.tol_id.in_(genome_tag) - ) - ) + genome_select = genome_select.filter(Genome.url_name.in_(genome_tag)) if organism_uuid is not None: genome_select = genome_select.filter(Organism.organism_uuid.in_(organism_uuid)) diff --git a/src/ensembl/production/metadata/api/adaptors/release.py b/src/ensembl/production/metadata/api/adaptors/release.py index b1d5ec65..a2f42403 100644 --- a/src/ensembl/production/metadata/api/adaptors/release.py +++ b/src/ensembl/production/metadata/api/adaptors/release.py @@ -15,45 +15,91 @@ from typing import List import sqlalchemy as db +from sqlalchemy import and_ -from ensembl.production.metadata.api.models import EnsemblRelease, EnsemblSite, GenomeRelease, Genome, GenomeDataset, \ - Dataset, ReleaseStatus from ensembl.production.metadata.api.adaptors.base import check_parameter, BaseAdaptor, cfg +from ensembl.production.metadata.api.models import ( + EnsemblRelease, + EnsemblSite, + GenomeRelease, + Genome, + GenomeDataset, + Dataset, + ReleaseStatus, +) logger = logging.getLogger(__name__) -def filter_release_status(query, - release_status: str | ReleaseStatus = None): +def filter_release_status(query, release_status: str | ReleaseStatus = None): + """ + Adds EnsemblSite join and filters based on release status and configuration. + + Args: + query: The SQLAlchemy query to filter + release_status: Optional release status to filter by + + Returns: + Modified query with site join and status filters applied + """ logger.debug(f"Allowed unreleased {cfg.allow_unreleased}") query = query.add_columns(EnsemblSite) + if not cfg.allow_unreleased: - query = query.join(EnsemblSite, - EnsemblSite.site_id == EnsemblRelease.site_id & - EnsemblSite.site_id == cfg.ensembl_site_id) \ - .filter(EnsemblRelease.status == ReleaseStatus.RELEASED) + # For released only: use inner join and filter + query = query.join( + EnsemblSite, + and_(EnsemblSite.site_id == EnsemblRelease.site_id, EnsemblSite.site_id == cfg.ensembl_site_id), + ).filter(EnsemblRelease.status == ReleaseStatus.RELEASED) else: - query = query.outerjoin(EnsemblSite, - EnsemblSite.site_id == EnsemblRelease.site_id & - EnsemblSite.site_id == cfg.ensembl_site_id) - # Release status filter only work when unreleased are allowed + # For unreleased allowed: use outer join + query = query.outerjoin( + EnsemblSite, + and_(EnsemblSite.site_id == EnsemblRelease.site_id, EnsemblSite.site_id == cfg.ensembl_site_id), + ) + # Release status filter only works when unreleased are allowed if release_status: if isinstance(release_status, str): release_status = ReleaseStatus(release_status) query = query.filter(EnsemblRelease.status == release_status) + return query +def _ensure_scalar(value): + """ + Ensures a parameter is a scalar value, unwrapping single-element lists. + Handles pytest parametrization edge cases. + + Args: + value: The value to check + + Returns: + Scalar value or None + """ + if value is None: + return None + + # Unwrap single-element lists/tuples (pytest parametrization edge case) + if isinstance(value, (list, tuple)) and len(value) == 1: + value = value[0] + + # If still a list/tuple, return as-is for IN clause handling + return value + + class ReleaseAdaptor(BaseAdaptor): - def fetch_releases(self, - release_id: int | List[int] = None, - release_version: float | List[float] = None, - current_only: bool = False, - site_name: str = None, - release_type: str = None, - release_label: str = None, - release_status: str | ReleaseStatus = None): + def fetch_releases( + self, + release_id: int | List[int] = None, + release_version: float | List[float] = None, + current_only: bool = False, + site_name: str = None, + release_type: str = None, + release_label: str = None, + release_status: str | ReleaseStatus = None, + ): """ Fetches releases based on the provided parameters. @@ -61,7 +107,7 @@ def fetch_releases(self, release_id: release internal id (int or list[int]) release_version (float or list or None): Release version(s) to filter by. current_only (bool): Flag indicating whether to fetch only current releases. - site_name (str): SIte name to filter by. + site_name (str): Site name to filter by. release_type (str): Release type to filter by. release_label (str): Release label to filter by. release_status: whether to filter particular release status @@ -71,73 +117,102 @@ def fetch_releases(self, """ release_select = db.select(EnsemblRelease).order_by(EnsemblRelease.version) + # Handle release_id parameter releases_id = check_parameter(release_id) if releases_id is not None: - release_select = release_select.filter( - EnsemblRelease.release_id.in_(releases_id) - ) + release_select = release_select.filter(EnsemblRelease.release_id.in_(releases_id)) - release_version = check_parameter(release_version) - # WHERE ensembl_release.version < version + # Handle release_version parameter + # Ensure it's a scalar for <= comparison, or list for IN clause + release_version = _ensure_scalar(check_parameter(release_version)) if release_version is not None: - release_select = release_select.filter( - EnsemblRelease.version <= release_version - ) - # WHERE ensembl_release.is_current =:is_current_1 + if isinstance(release_version, (list, tuple)): + # Multiple versions: use IN clause + release_select = release_select.filter(EnsemblRelease.version.in_(release_version)) + else: + # Single version: use <= comparison + # Convert to float to ensure type compatibility with SQLite + release_version = float(release_version) + release_select = release_select.filter(EnsemblRelease.version <= release_version) + + # Filter for current releases only if current_only: - release_select = release_select.filter( - EnsemblRelease.is_current == 1 - ) + release_select = release_select.filter(EnsemblRelease.is_current == 1) - # WHERE ensembl_release.release_type = :release_type_1 + # Filter by release type if release_type is not None: - release_select = release_select.filter( - EnsemblRelease.release_type.in_(release_type) - ) + release_type = check_parameter(release_type) + release_select = release_select.filter(EnsemblRelease.release_type.in_(release_type)) + # Filter by release label if release_label is not None: - release_select = release_select.filter( - EnsemblRelease.label.in_(release_label) - ) + release_label = check_parameter(release_label) + release_select = release_select.filter(EnsemblRelease.label.in_(release_label)) + # Filter by site name (requires site join, so must come before filter_release_status) if site_name is not None: - release_select = release_select.filter( - EnsemblSite.name.in_(site_name) - ) + site_name = check_parameter(site_name) + release_select = release_select.filter(EnsemblSite.name.in_(site_name)) - release_select = release_select.filter( - EnsemblSite.site_id == cfg.ensembl_site_id - ) + # Add site join and status filters + # NOTE: This already handles the site_id == cfg.ensembl_site_id filter release_select = filter_release_status(release_select, release_status) + logger.debug("Query: %s ", release_select) + with self.metadata_db.session_scope() as session: session.expire_on_commit = False return session.execute(release_select).all() def fetch_releases_for_genome(self, genome_uuid): + """ + Fetches releases associated with a specific genome. + + Args: + genome_uuid: The UUID of the genome + + Returns: + list: A list of releases for the genome + """ select_released = db.select(EnsemblRelease).join(GenomeRelease) + if not cfg.allow_unreleased: select_released = select_released.filter(EnsemblRelease.status == ReleaseStatus.RELEASED) + select_released = select_released.join(Genome).where(Genome.genome_uuid == genome_uuid) select_released = filter_release_status(select_released) logger.debug("Query: %s ", select_released) + with self.metadata_db.session_scope() as session: session.expire_on_commit = False releases = session.execute(select_released).all() return releases def fetch_releases_for_dataset(self, dataset_uuid): - select_released = db.select(EnsemblRelease) \ - .select_from(Dataset) \ - .join(GenomeDataset) \ - .join(EnsemblRelease) \ + """ + Fetches releases associated with a specific dataset. + + Args: + dataset_uuid: The UUID of the dataset + + Returns: + list: A list of releases for the dataset + """ + select_released = ( + db.select(EnsemblRelease) + .select_from(Dataset) + .join(GenomeDataset) + .join(EnsemblRelease) .where(Dataset.dataset_uuid == dataset_uuid) + ) if not cfg.allow_unreleased: select_released = select_released.filter(EnsemblRelease.status == ReleaseStatus.RELEASED) + select_released = filter_release_status(select_released) logger.debug("Query: %s ", select_released) + with self.metadata_db.session_scope() as session: session.expire_on_commit = False releases = session.execute(select_released).all() diff --git a/src/ensembl/production/metadata/api/factories/datasets.py b/src/ensembl/production/metadata/api/factories/datasets.py index aa5c75c0..faa0d058 100644 --- a/src/ensembl/production/metadata/api/factories/datasets.py +++ b/src/ensembl/production/metadata/api/factories/datasets.py @@ -686,15 +686,6 @@ def query_all_child_datasets(self, parent_dataset_uuid, session=None): all_child_datasets.extend(sub_children) return all_child_datasets - def __query_depends_on(self, session, dataset_uuid): - dataset = session.query(Dataset).filter(Dataset.dataset_uuid == dataset_uuid).one_or_none() - dataset_type = dataset.dataset_type - dependent_types = dataset_type.depends_on.split(',') if dataset_type.depends_on else [] - dependent_datasets_info = [] - for dtype in dependent_types: - new_uuid, new_status = self.__query_related_genome_by_type(session, dataset_uuid, dtype) - dependent_datasets_info.append((new_uuid, new_status)) - return dependent_datasets_info def __update_status(self, session, dataset_uuid, status): # Processed to Released. Only accept top level. Check that all assembly and genebuild datsets (all the way down) are processed. @@ -720,10 +711,6 @@ def __update_status(self, session, dataset_uuid, status): if current_dataset.status == DatasetStatus.RELEASED: # "Released": # and it is not top level. return updated_datasets # Check the dependents - dependents = self.__query_depends_on(session, dataset_uuid) - for uuid, dep_status in dependents: - if dep_status not in (DatasetStatus.PROCESSED, DatasetStatus.RELEASED): # ("Processed", "Released"): - return updated_datasets current_dataset.status = DatasetStatus.PROCESSING # "Processing" parent_uuid, parent_status = self.__query_parent_datasets(session, dataset_uuid) if parent_uuid is not None: diff --git a/src/ensembl/production/metadata/api/factories/genomes.py b/src/ensembl/production/metadata/api/factories/genomes.py index 9f7f4cf6..ffeaf42f 100644 --- a/src/ensembl/production/metadata/api/factories/genomes.py +++ b/src/ensembl/production/metadata/api/factories/genomes.py @@ -11,9 +11,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -''' +""" Fetch Genome Info From New Metadata Database -''' +""" import argparse import json @@ -30,7 +30,7 @@ from ensembl.production.metadata.api.models.genome import Genome, GenomeDataset, GenomeRelease from ensembl.production.metadata.api.models.organism import Organism, OrganismGroup, OrganismGroupMember -logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') +logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") logger = logging.getLogger(__name__) @@ -47,21 +47,56 @@ class GenomeInputFilters: release_id: int = 0 batch_size: int = 50 page: int = 1 - organism_group_type: str = '' + organism_group_type: str = "" run_all: int = 0 update_dataset_status: str = "" update_dataset_attribute: dict = field(default_factory=lambda: {}) - columns: List = field(default_factory=lambda: [Genome.genome_uuid.label('genome_uuid'), - Genome.production_name.label('species'), - Dataset.dataset_uuid.label('dataset_uuid'), - Dataset.status.label('dataset_status'), - DatasetSource.name.label('dataset_source'), - DatasetType.name.label('dataset_type') - ]) + columns: List = field( + default_factory=lambda: [ + Genome.genome_uuid.label("genome_uuid"), + Genome.production_name.label("species"), + Dataset.dataset_uuid.label("dataset_uuid"), + Dataset.status.label("dataset_status"), + DatasetSource.name.label("dataset_source"), + DatasetType.name.label("dataset_type"), + ] + ) @dataclass class GenomeFactory: + + @staticmethod + def _normalize_status_to_enum(status_list): + """ + Convert a list of status strings to DatasetStatus enum values. + This ensures compatibility between SQLite and MySQL. + + Args: + status_list: List of status strings or enums + + Returns: + List of DatasetStatus enum values + """ + if not status_list: + return [] + + normalized = [] + for status in status_list: + if isinstance(status, DatasetStatus): + # Already an enum + normalized.append(status) + elif isinstance(status, str): + # Convert string to enum + try: + normalized.append(DatasetStatus(status)) + except ValueError: + logger.warning(f"Invalid status value: {status}") + else: + logger.warning(f"Unexpected status type: {type(status)} for value {status}") + + return normalized + @staticmethod def _apply_filters(query, filters): @@ -70,12 +105,12 @@ def _apply_filters(query, filters): if filters.run_all: filters.division = [ - 'EnsemblBacteria', - 'EnsemblVertebrates', - 'EnsemblPlants', - 'EnsemblProtists', - 'EnsemblMetazoa', - 'EnsemblFungi', + "EnsemblBacteria", + "EnsemblVertebrates", + "EnsemblPlants", + "EnsemblProtists", + "EnsemblMetazoa", + "EnsemblFungi", ] if filters.genome_uuid: @@ -87,9 +122,11 @@ def _apply_filters(query, filters): if filters.division: ensembl_divisions = filters.division - if filters.organism_group_type == 'DIVISION': - pattern = re.compile(r'^(ensembl)?', re.IGNORECASE) - ensembl_divisions = ['Ensembl' + pattern.sub('', d).capitalize() for d in ensembl_divisions if d] + if filters.organism_group_type == "DIVISION": + pattern = re.compile(r"^(ensembl)?", re.IGNORECASE) + ensembl_divisions = [ + "Ensembl" + pattern.sub("", d).capitalize() for d in ensembl_divisions if d + ] query = query.filter(OrganismGroup.name.in_(ensembl_divisions)) @@ -106,129 +143,220 @@ def _apply_filters(query, filters): if filters.release_id: query = query.join(Genome.genome_releases) - query = query.filter(GenomeDataset.release_id==filters.release_id) - query = query.filter(GenomeRelease.release_id==filters.release_id) + query = query.filter(GenomeDataset.release_id == filters.release_id) + query = query.filter(GenomeRelease.release_id == filters.release_id) if filters.dataset_type: - query = query.filter(Genome.genome_datasets.any(DatasetType.name.in_([filters.dataset_type]))) + query = query.filter(DatasetType.name == filters.dataset_type) if filters.dataset_status: - query = query.filter(Dataset.status.in_(filters.dataset_status)) + status_enums = GenomeFactory._normalize_status_to_enum(filters.dataset_status) + if status_enums: + query = query.filter(Dataset.status.in_(status_enums)) + else: + logger.warning(f"No valid status values to filter on: {filters.dataset_status}") if filters.batch_size: filters.page = filters.page if filters.page > 0 else 1 query = query.offset((filters.page - 1) * filters.batch_size).limit(filters.batch_size) + logger.debug(f"Filter Query {query}") return query def _build_query(self, filters): - query = select(*filters.columns) \ - .select_from(Genome) \ - .join(Genome.assembly) \ - .join(Genome.organism) \ - .join(Organism.organism_group_members) \ - .join(OrganismGroupMember.organism_group) \ - .join(Genome.genome_datasets) \ - .join(GenomeDataset.dataset) \ - .join(Dataset.dataset_source) \ - .join(Dataset.dataset_type) \ - .group_by(Genome.genome_id, Dataset.dataset_id) \ + query = ( + select(*filters.columns) + .select_from(Genome) + .join(Genome.assembly) + .join(Genome.organism) + .join(Organism.organism_group_members) + .join(OrganismGroupMember.organism_group) + .join(Genome.genome_datasets) + .join(GenomeDataset.dataset) + .join(Dataset.dataset_source) + .join(Dataset.dataset_type) + .group_by(Genome.genome_id, Dataset.dataset_id) .order_by(Genome.genome_uuid) + ) return self._apply_filters(query, filters) def get_genomes(self, **filters: GenomeInputFilters): filters = GenomeInputFilters(**filters) - logger.info(f'Get Genomes with filters {filters}') + logger.info(f"Get Genomes with filters {filters}") with DBConnection(filters.metadata_db_uri).session_scope() as session: query = self._build_query(filters) - logger.info(f'Executing SQL query: {query}') - for genome in session.execute(query).fetchall(): + logger.info(f"Executing SQL query: {query}") + + results = session.execute(query).fetchall() + logger.debug(f"Query returned {len(results)} results") + + for genome in results: genome_info = genome._asdict() - dataset_uuid = genome_info.get('dataset_uuid', None) + dataset_uuid = genome_info.get("dataset_uuid", None) - # convert status enum object to string value - dataset_status = genome_info.get('dataset_status', None) + dataset_status = genome_info.get("dataset_status", None) if dataset_status and isinstance(dataset_status, DatasetStatus): - genome_info['dataset_status'] = dataset_status.value + genome_info["dataset_status"] = dataset_status.value if not dataset_uuid: - logger.warning( - f"No dataset uuid found for genome {genome_info} skipping this genome " - ) + logger.warning(f"No dataset uuid found for genome {genome_info} skipping this genome ") continue if filters.update_dataset_status: - _, status = DatasetFactory(filters.metadata_db_uri) \ - .update_dataset_status(dataset_uuid, - filters.update_dataset_status, - session=session) - if filters.update_dataset_status == status.value: + update_status = filters.update_dataset_status + if isinstance(update_status, str): + try: + update_status_enum = DatasetStatus(update_status) + except ValueError: + logger.error(f"Invalid update_dataset_status: {update_status}") + genome_info["updated_dataset_status"] = None + yield genome_info + continue + else: + update_status_enum = update_status + + _, status = DatasetFactory(filters.metadata_db_uri).update_dataset_status( + dataset_uuid, update_status_enum.value, session=session + ) + + if update_status_enum == status: logger.info( f"Updated Dataset status for dataset uuid: {dataset_uuid} from " - f"{filters.update_dataset_status} to {status} for genome {genome_info['genome_uuid']}" + f"{genome_info.get('dataset_status')} to {status.value} " + f"for genome {genome_info['genome_uuid']}" ) - genome_info['updated_dataset_status'] = status.value - + genome_info["updated_dataset_status"] = status.value else: logger.warning( f"Cannot update status for dataset uuid: {dataset_uuid} " - f"{filters.update_dataset_status} to {status} for genome {genome_info['genome_uuid']}" + f"from {genome_info.get('dataset_status')} to {status.value} " + f"for genome {genome_info['genome_uuid']}" ) - genome_info['updated_dataset_status'] = None + genome_info["updated_dataset_status"] = None + session.flush() yield genome_info def main(): parser = argparse.ArgumentParser( - prog='genomes.py', - description='Fetch Ensembl genome info from the new metadata database' + prog="genomes.py", description="Fetch Ensembl genome info from the new metadata database" + ) + parser.add_argument( + "--genome_uuid", + type=str, + nargs="*", + default=[], + required=False, + help="List of genome UUIDs to filter the query. Default is an empty list.", + ) + parser.add_argument( + "--dataset_uuid", + type=str, + nargs="*", + default=[], + required=False, + help="List of dataset UUIDs to filter the query. Default is an empty list.", + ) + parser.add_argument( + "--organism_group_type", + type=str, + default="DIVISION", + required=False, + help='Organism group type to filter the query. Default is "DIVISION"', + ) + parser.add_argument( + "--division", + type=str, + nargs="*", + default=[], + required=False, + help="List of organism group names to filter the query. Default is an empty list.", + ) + parser.add_argument( + "--dataset_type", + type=str, + default="assembly", + required=False, + help="List of dataset types to filter the query. Default is an empty list.", + ) + parser.add_argument( + "--species", + type=str, + nargs="*", + default=[], + required=False, + help="List of Species Production names to filter the query. Default is an empty list.", + ) + parser.add_argument( + "--antispecies", + type=str, + nargs="*", + default=[], + required=False, + help="List of Species Production names to exclude from the query. Default is an empty list.", + ) + parser.add_argument( + "--release_id", + type=int, + default=0, + required=False, + help="Genome_dataset release_id to filter the query. Default is 0 (no filter).", + ) + parser.add_argument( + "--dataset_status", + nargs="*", + default=["Submitted"], + choices=["Submitted", "Processing", "Processed", "Released"], + required=False, + help="List of dataset statuses to filter the query. Default is an empty list.", + ) + parser.add_argument( + "--update_dataset_status", + type=str, + default="", + required=False, + choices=["Submitted", "Processing", "Processed", "Released", ""], + help="Update the status of the selected datasets to the specified value. ", + ) + parser.add_argument( + "--batch_size", + type=int, + default=50, + required=False, + help="Number of results to retrieve per batch. Default is 50.", + ) + parser.add_argument( + "--page", + default=1, + required=False, + type=lambda x: int(x) if int(x) > 0 else argparse.ArgumentTypeError("{x} is not a positive integer"), + help="The page number for pagination. Default is 1.", + ) + parser.add_argument( + "--metadata_db_uri", + type=str, + required=True, + help="metadata db mysql uri, ex: mysql://ensro@localhost:3366/ensembl_genome_metadata", ) - parser.add_argument('--genome_uuid', type=str, nargs='*', default=[], required=False, - help='List of genome UUIDs to filter the query. Default is an empty list.') - parser.add_argument('--dataset_uuid', type=str, nargs='*', default=[], required=False, - help='List of dataset UUIDs to filter the query. Default is an empty list.') - parser.add_argument('--organism_group_type', type=str, default='DIVISION', required=False, - help='Organism group type to filter the query. Default is "DIVISION"') - parser.add_argument('--division', type=str, nargs='*', default=[], required=False, - help='List of organism group names to filter the query. Default is an empty list.') - parser.add_argument('--dataset_type', type=str, default="assembly", required=False, - help='List of dataset types to filter the query. Default is an empty list.') - parser.add_argument('--species', type=str, nargs='*', default=[], required=False, - help='List of Species Production names to filter the query. Default is an empty list.') - parser.add_argument('--antispecies', type=str, nargs='*', default=[], required=False, - help='List of Species Production names to exclude from the query. Default is an empty list.') - parser.add_argument('--release_id', type=int, default=0, required=False, - help='Genome_dataset release_id to filter the query. Default is 0 (no filter).') - parser.add_argument('--dataset_status', nargs='*', default=["Submitted"], - choices=['Submitted', 'Processing', 'Processed', 'Released'], required=False, - help='List of dataset statuses to filter the query. Default is an empty list.') - parser.add_argument('--update_dataset_status', type=str, default="", required=False, - choices=['Submitted', 'Processing', 'Processed', 'Released', ''], - help='Update the status of the selected datasets to the specified value. ') - parser.add_argument('--batch_size', type=int, default=50, required=False, - help='Number of results to retrieve per batch. Default is 50.') - parser.add_argument('--page', default=1, required=False, - type=lambda x: int(x) if int(x) > 0 else argparse.ArgumentTypeError( - "{x} is not a positive integer"), - help='The page number for pagination. Default is 1.') - parser.add_argument('--metadata_db_uri', type=str, required=True, - help='metadata db mysql uri, ex: mysql://ensro@localhost:3366/ensembl_genome_metadata') - parser.add_argument('--output', type=str, required=True, help='output file ex: genome_info.json') + parser.add_argument("--output", type=str, required=True, help="output file ex: genome_info.json") args = parser.parse_args() meta_details = re.match(r"mysql:\/\/.*:?(.*?)@(.*?):\d+\/(.*)", args.metadata_db_uri) - with open(args.output, 'w') as json_output: - logger.info(f'Connecting Metadata Database with host:{meta_details.group(2)} & dbname:{meta_details.group(3)}') + with open(args.output, "w") as json_output: + logger.info( + f"Connecting Metadata Database with host:{meta_details.group(2)} & dbname:{meta_details.group(3)}" + ) genome_fetcher = GenomeFactory() - logger.info(f'Writing Results to {args.output}') - for genome in genome_fetcher.get_genomes( + logger.info(f"Writing Results to {args.output}") + for genome in ( + genome_fetcher.get_genomes( metadata_db_uri=args.metadata_db_uri, update_dataset_status=args.update_dataset_status, genome_uuid=args.genome_uuid, @@ -241,13 +369,15 @@ def main(): batch_size=args.batch_size, release_id=args.release_id, dataset_status=args.dataset_status, - ) or []: + ) + or [] + ): json.dump(genome, json_output) json_output.write("\n") - logger.info(f'Completed !') + logger.info(f"Completed !") if __name__ == "__main__": - logger.info('Fetching Genome Information From New Metadata Database') + logger.info("Fetching Genome Information From New Metadata Database") main() diff --git a/src/ensembl/production/metadata/api/factories/release.py b/src/ensembl/production/metadata/api/factories/release.py index 9d4683f2..56793c71 100644 --- a/src/ensembl/production/metadata/api/factories/release.py +++ b/src/ensembl/production/metadata/api/factories/release.py @@ -86,11 +86,21 @@ def init_release( version = round(version, 1) # Validate release date only if provided + release_date_obj = None if release_date: try: - datetime.strptime(release_date, "%Y-%m-%d").date() + release_date_obj = datetime.strptime(release_date, "%Y-%m-%d").date() except ValueError: raise ValueError("Invalid release_date format. Expected YYYY-MM-DD.") + else: + if label: + try: + release_date_obj = datetime.strptime(label, "%Y-%m-%d").date() + release_date = label # Store the string for later label assignment + except ValueError: + raise ValueError("Invalid label format. Expected YYYY-MM-DD when used as date.") + else: + raise ValueError("Either release_date or label must be specified.") # Create a name if not provided. It should be one higher than any existing partial release. if not name and release_type == "partial": @@ -100,8 +110,6 @@ def init_release( # Ensure label is defined if label is None: - if release_date is None: - raise ValueError("Either release_date or label must be specified.") label = release_date # Validate release type @@ -115,7 +123,7 @@ def init_release( # Create and store the new release release = EnsemblRelease( version=version, - release_date=release_date, # Will be stored as NULL if None + release_date=release_date_obj, label=label, ensembl_site=site_obj, release_type=release_type, diff --git a/src/ensembl/production/metadata/api/factory.py b/src/ensembl/production/metadata/api/factory.py index 98191434..da6cb786 100644 --- a/src/ensembl/production/metadata/api/factory.py +++ b/src/ensembl/production/metadata/api/factory.py @@ -16,7 +16,7 @@ from ensembl.production.metadata.updater.core import CoreMetaUpdater -def meta_factory(db_uri, metadata_uri, force=False): +def meta_factory(db_uri, metadata_uri, taxonomy_uri): db_url = make_url(db_uri) if '_compara_' in db_url.database: raise Exception("compara not implemented yet") @@ -30,7 +30,7 @@ def meta_factory(db_uri, metadata_uri, force=False): elif '_funcgen_' in db_url.database: raise Exception("funcgen not implemented yet") elif '_core_' in db_url.database: - return CoreMetaUpdater(db_uri, metadata_uri) + return CoreMetaUpdater(db_uri, metadata_uri, taxonomy_uri) elif '_otherfeatures_' in db_url.database: raise Exception("otherfeatures not implemented yet") elif '_rnaseq_' in db_url.database: diff --git a/src/ensembl/production/metadata/api/models/assembly.py b/src/ensembl/production/metadata/api/models/assembly.py index be78e07f..f1af78d3 100644 --- a/src/ensembl/production/metadata/api/models/assembly.py +++ b/src/ensembl/production/metadata/api/models/assembly.py @@ -24,7 +24,7 @@ class Assembly(LoadAble, Base): __tablename__ = "assembly" assembly_id = Column(Integer, primary_key=True) - assembly_uuid = Column(String(40), unique=True, nullable=False, default=uuid.uuid4) + assembly_uuid = Column(String(40), unique=True, nullable=False, default=lambda: str(uuid.uuid4())) ucsc_name = Column(String(16)) accession = Column(String(16), nullable=False, unique=True) level = Column(String(32), nullable=False) diff --git a/src/ensembl/production/metadata/api/models/dataset.py b/src/ensembl/production/metadata/api/models/dataset.py index 186958f5..c71f3b7e 100644 --- a/src/ensembl/production/metadata/api/models/dataset.py +++ b/src/ensembl/production/metadata/api/models/dataset.py @@ -61,7 +61,7 @@ class Dataset(LoadAble, Base): __tablename__ = 'dataset' dataset_id = Column(Integer, primary_key=True) - dataset_uuid = Column(String(40), nullable=False, unique=True, default=str(uuid.uuid4)) + dataset_uuid = Column(String(40), nullable=False, unique=True, default=lambda: str(uuid.uuid4())) dataset_type_id = Column(ForeignKey('dataset_type.dataset_type_id'), nullable=False, index=True) name = Column(String(128), nullable=False) version = Column(String(128)) diff --git a/src/ensembl/production/metadata/api/models/genome.py b/src/ensembl/production/metadata/api/models/genome.py index bd2a5e0a..9cf2d9f1 100644 --- a/src/ensembl/production/metadata/api/models/genome.py +++ b/src/ensembl/production/metadata/api/models/genome.py @@ -27,7 +27,7 @@ class Genome(LoadAble, Base): __tablename__ = "genome" genome_id = Column(Integer, primary_key=True) - genome_uuid = Column(String(32), nullable=False, unique=True, default=str(uuid.uuid4)) + genome_uuid = Column(String(32), nullable=False, unique=True, default=lambda: str(uuid.uuid4())) assembly_id = Column(ForeignKey("assembly.assembly_id"), nullable=False, index=True) organism_id = Column(ForeignKey("organism.organism_id"), nullable=False, index=True) created = Column(DATETIME(fsp=6), nullable=False) diff --git a/src/ensembl/production/metadata/api/models/organism.py b/src/ensembl/production/metadata/api/models/organism.py index 46474fa9..5a06666f 100644 --- a/src/ensembl/production/metadata/api/models/organism.py +++ b/src/ensembl/production/metadata/api/models/organism.py @@ -24,7 +24,7 @@ class Organism(LoadAble, Base): __tablename__ = "organism" organism_id = Column(Integer, primary_key=True) - organism_uuid = Column(String(40), unique=True, nullable=False, default=uuid.uuid4) + organism_uuid = Column(String(40), unique=True, nullable=False, default=lambda: str(uuid.uuid4())) taxonomy_id = Column(Integer, nullable=False) species_taxonomy_id = Column(Integer) common_name = Column(String(128), nullable=True) diff --git a/src/ensembl/production/metadata/grpc/protobuf_msg_factory.py b/src/ensembl/production/metadata/grpc/protobuf_msg_factory.py index 8531c5a6..1fa96c05 100644 --- a/src/ensembl/production/metadata/grpc/protobuf_msg_factory.py +++ b/src/ensembl/production/metadata/grpc/protobuf_msg_factory.py @@ -9,10 +9,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import logging from datetime import datetime from ensembl.production.metadata.grpc import ensembl_metadata_pb2 -import logging logger = logging.getLogger(__name__) @@ -116,7 +116,6 @@ def create_assembly(data=None): ucsc_name=data.Assembly.ucsc_name, ensembl_name=data.Assembly.ensembl_name, is_reference=data.Assembly.is_reference, - url_name=data.Assembly.url_name, tol_id=data.Assembly.tol_id, ) return assembly diff --git a/src/ensembl/production/metadata/scripts/organism_to_organismgroup.py b/src/ensembl/production/metadata/scripts/organism_to_organismgroup.py index c85b1203..ffaf6592 100644 --- a/src/ensembl/production/metadata/scripts/organism_to_organismgroup.py +++ b/src/ensembl/production/metadata/scripts/organism_to_organismgroup.py @@ -1,12 +1,25 @@ -import os +# See the NOTICE file distributed with this work for additional information +# regarding copyright ownership. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import argparse import logging +import os from ensembl.core.models import Meta from ensembl.utils.database import DBConnection -from ensembl.production.metadata.api.models.organism import OrganismGroup, OrganismGroupMember, Organism -from ensembl.production.metadata.api.models.genome import Genome, GenomeDataset, GenomeRelease + from ensembl.production.metadata.api.models.dataset import Dataset, DatasetSource +from ensembl.production.metadata.api.models.genome import Genome, GenomeDataset, GenomeRelease +from ensembl.production.metadata.api.models.organism import OrganismGroup, OrganismGroupMember, Organism # Set up the logging configuration logging.basicConfig( diff --git a/src/ensembl/production/metadata/updater/base.py b/src/ensembl/production/metadata/updater/base.py index 35c69f5c..cda3cdf9 100644 --- a/src/ensembl/production/metadata/updater/base.py +++ b/src/ensembl/production/metadata/updater/base.py @@ -18,11 +18,13 @@ class BaseMetaUpdater: - def __init__(self, db_uri, metadata_uri, release=None): + def __init__(self, db_uri, metadata_uri, taxonomy_uri, release=None): self.db_uri = db_uri self.metadata_uri = metadata_uri + self.taxonomy_uri = taxonomy_uri self.db = DBConnection(self.db_uri) self.metadata_db = DBConnection(metadata_uri) + self.taxonomy_db = DBConnection(taxonomy_uri) # We will add a release later. For now, the release must be specified for it to be used. if release is None: self.listed_release = None diff --git a/src/ensembl/production/metadata/updater/core.py b/src/ensembl/production/metadata/updater/core.py index 92d7b030..56a42010 100644 --- a/src/ensembl/production/metadata/updater/core.py +++ b/src/ensembl/production/metadata/updater/core.py @@ -35,8 +35,8 @@ class CoreMetaUpdater(BaseMetaUpdater): - def __init__(self, db_uri, metadata_uri, release=None): - super().__init__(db_uri, metadata_uri, release) + def __init__(self, db_uri, metadata_uri, taxonomy_uri, release=None): + super().__init__(db_uri, metadata_uri, taxonomy_uri, release) self.db_type = 'core' # Single query to get all of the metadata information. self.meta_dict = {} @@ -318,7 +318,9 @@ def new_genome(self, meta_session, species_id, organism, assembly, assembly_data production_name = self.get_meta_single_meta_key(species_id, "organism.production_name") genebuild_version = self.get_meta_single_meta_key(species_id, "genebuild.version") genebuild_date = self.get_meta_single_meta_key(species_id, "genebuild.last_geneset_update") - if genebuild_date is None: + url_name = self.get_meta_single_meta_key(species_id, "assembly.url_name") + annotation_source = self.get_meta_single_meta_key(species_id, "genebuild.annotation_source") + if genebuild_date is None: ##TODO Make this so any of the above are none it fails! raise exceptions.MetadataUpdateException(f"Unable to parse genebuild.last_geneset_update from meta") # get next release inline to attach the genome to planned_release = get_or_new_release(self.metadata_uri) @@ -329,8 +331,9 @@ def new_genome(self, meta_session, species_id, organism, assembly, assembly_data genebuild_date=genebuild_date, genebuild_version=genebuild_version, created=func.now(), - is_best=0, production_name=production_name, + url_name=url_name, + annotation_source=annotation_source ) logger.debug(f"Assigning genome {new_genome.genome_uuid} to {planned_release.version}") meta_session.add(new_genome) @@ -380,7 +383,7 @@ def get_or_new_organism(self, species_id, meta_session): if taxid is None: raise exceptions.MissingMetaException("organism.taxid is required") if common_name is None: - with self.metadata_db.session_scope() as session: + with self.taxonomy_db.session_scope() as session: common_name = session.query(NCBITaxaName).filter( NCBITaxaName.taxon_id == taxid, NCBITaxaName.name_class == "genbank common name" @@ -416,7 +419,7 @@ def get_or_new_organism(self, species_id, meta_session): # If no existing Organism is found, conduct additional checks before creating a new one. # Check if the new organism's taxonomy ID exists in the taxonomy database. - with self.metadata_db.session_scope() as session: + with self.taxonomy_db.session_scope() as session: try: Taxonomy.fetch_node_by_id(session, new_organism.taxonomy_id) except NoResultFound: @@ -572,10 +575,8 @@ def get_or_new_assembly(self, species_id, meta_session, source=None): accession_body=accession_body, assembly_default=self.get_meta_single_meta_key(species_id, "assembly.default"), tol_id=tol_id, - alt_accession=self.get_meta_single_meta_key(species_id, "assembly.alt_accession"), created=func.now(), assembly_uuid=str(uuid.uuid4()), - url_name=self.get_meta_single_meta_key(species_id, "assembly.url_name"), is_reference=is_reference ) dataset_factory = DatasetFactory(self.metadata_uri) diff --git a/src/tests/databases/compara_db.db b/src/tests/databases/compara_db.db new file mode 100644 index 0000000000000000000000000000000000000000..9e05e178a55b0e9b0e1b77cb65cf4949ea57fb38 GIT binary patch literal 8192 zcmeI1&u`;I6vv$|BI&Q}F1x6UD(GCQCa_68cAPjuE8%8CS~g7~$x>CUWKEoDGFtzL zY^Q8bs1gE%gt+q`aOKQ}J6G^cO@O~>iwHyn zA_5VCh(JUjA`lUX2t))T0uh0iPT=!YxUB;R!?C` zO{2%<_qC_hCfeD#Z9sj&X(caNf zwXD^~I>gIDn5TCfr>(=cVV#()V_Ver=X!;349Ds@Hg_1_vy$c*Jl`eE)-Th58{}2AnjFgX zN}7K+e=UxMs@>?h% z()B0|LvjJHE+}9OftY@lgZR$^0s?hsXwIlyQ;@`Sf}gOMz4c5da1xsaBu;QTyU zWp9vJHP)dnBUe9nxgd5RWn)1pqcjI8b0&g+Ed_Hb(kdu<=M6q32^;?`PjaDBFLmVt zlsq{j7J~|7@2-K*_AH;kL50~G9OQw+#r9jbg5pTDP`(FJBkml54? zOr}FU)t~EF(!G9*A6p%qR}`&rsHizU{qy2p42$j)sKRJqmqmvaZde}2Lp&Y=(=3`| z7&btFR8S~I*q{p~ZMS#f#)^T=tz%I6KsN}yvvIqm)rJL!r^5??4za!CJ%gH(422oP cf?&81s7a+u)+n?_sTzfn?zHJcRFl&7-^?&Y2LJ#7 literal 0 HcmV?d00001 diff --git a/src/tests/databases/core_1.db b/src/tests/databases/core_1.db new file mode 100644 index 0000000000000000000000000000000000000000..42f1fdf58fde2efc30fe0d46126aab53293d5a56 GIT binary patch literal 28672 zcmeI4Pj4GV6u@_F$BsjsC2dnTO-m*XAqhzwZ(P?&Xa#O+O4~Gr#L!kftR}mYWXb+9 zyX#O3acGL*iZ~$A55SEBp8z2QCyrcD?}!748*qa+YkR%6Vem4H|Cio;E;`2pU=(0#All#sc7ojZyT zd@4w%hQ!qBpz_RT@BuF*fCP{L5DH5(fn>K-(KT{yqg=XL+9Vsd%4Fls_4RquX4%W4 zJ`nO&adYjp;^xd!K0Djyg5CF+V^KrVwnM#e=dW*GyII`4OKz0zl9?mNG&?Z+V688a zUR)Fp^PLE&=Q;Xzs9jwU?**W)?ub7W)s@n##W&Z>WI8+D7C^n7;?*iF6aeXEen5>X zBjwT^px7V@QQV0o(i0Qn;d>oXw1(f;W8A$5+;q=bTGV8r9OT<`pxU;h!JKhDW`<)3 zdoY-_>+Kym3wk9=*LZ@iL=)+e5%F-}56xsA48^0dws5$F2g2Wb4Zy81RcCHk9Wa)s zd60hVeO3zsJ>N0ce1m!dTJy1kMO!e9@jw`dCnAaT)Rg$J9!8j0_&(YeR#?Ns3x5vB z5$62QVFq)iE2*8a9x%s+UxI+eQa(S+8#&b4KQfDr`QHR1*Uj5-bc~;8mx+n=*HRVU@InGe00|%gB!C2v01`j~NB{{Sfu~O3;q4D5*=uT z(j3Rut<@}Ch4WcLEsa=J)nL@=*5{VV>?%1&E4pg?P84+y24cG~rgQV;oVsV&P7e*Y zJF-oUS`a$xaAmOS(S4Xyb?oK{+q*aQe^7b0x@TLOqr-k*d$S{)Wv(4w>=ULI@2R?@ z@~m(cYmNIL7lEMVkJyfn*M!|W(>541W|!i9!rAeA9)!;hm{GPXWWDW%Y}nSqCex~x z#`Cq+Ov|Rfp~~0)qV%l*e|RAQB!C2v01`j~NB{{S0VIF~kN^@u0{>?MD`G-OE{r6v zRUF-t`B~udJ+`eFhY{baSblsj9B3P$-IUx7Oh4p3xMj zr^f;{PbMH`PF6lM1geFWs)oRyQ^zOg-4o|mvZOxncVas z7|S8!mf@?4+_;u66Q!#KNAEik| zW1=WU<8mUxZz6FyF2<6g6qkeZ|6lR^|H&j3y&wT3fCP{L5t-X5vk%*A#NObHwvUzPUeQ=y|s$2k!zc!!qq~NY~Cu7%{v<#3#3DHkVf4R zayMUGe>Y#8Th3eC|Wtt3pf5=@!HLN@d3F}ctGa*`ZPa0|IzwTEV;BK z9%Q=_P|tJpa-dzkB02~_UECFaAgU{cck*{ON@O-O+Yvy$o#It1ED!)0q`prLpOI4G zK2U6sgedMuV#(=g@!-R*DBA814e)!g!%g#?rA18^7=vu*7^t@GXfS77kD0+3f))&B z?RdL=XThL>(ltInSHiL6__%nmR}0N#9=PJsNJlulh6lp0gB)-xOx2ki6t@`5vpmRJ z>pfO&1$w?~uGJLk1JIt2T`W3+Y4`(S92^V9l4s6{PwHU=i3RV&9bp9}JiOp@FpePS z7l+xJGd+Xa9qT@GT=<`$#bP;|o#%}lYVGwM#m4+^f|2X$9XPuD=cCKSSaN1Yd@@!W z-N1Jta2x7?5)g1EfyY5eR9g#7UKrEXxOaXNdnfq-MUA61O!ck!8}GR3e} z%GZfK6(yVK8nZloyUMrGYTBfwO_qqt48u35LsZ+WP{%Vt*(KgC z-MW~8k~^Ov)Y6FMs|KS^uY6^d%wHs@Xhm1;nh{0a1xIWbd^)v2PN}ex*WJG(da7l=Gt-L);v(P8hfvsoWw=}XZeVJh{$2QIUHW|Zs- z+32_-o3^!BWSVbjobgr@WwExQ%Gdv*^rHa(upj{>fCP{L5AEURWKcy%%!h!^l z01`j~NB{{S0VIF~kN^^RF@f31AtAm#xx*}0_95S!rj})UwPdv7+PzU(2P&{a#mB7cQhDpm}MjZ3s^P^K`qeD*T|gN*&u!9`cZ!N4C0Gor4T-{L)NIzv9@M zukze|O}`d=UGr27G?&h_G&Ox+=V#>`Je5*mt8_XHI@z{0$kGF)nx#OWljEQzkHZ*2 z!aQBSD88?29LD4r?=&8?>8i)%R;r0mM#+Nzv8pIcct0xffLPfK0_PULUizsgSu zWZ|xlyYD`# z896C|*14mg#n1Ss8-Ui<)}{`DU?tgHpr?J+RTT=yAKa~VcqeEg0pi)omY7cn7#8Ma zdlkOjr*O!=_g% z9^zHYyejnN80=9slw@8ryl|GU|D)1pg7lB{m-I~fMfw&Vupj{>fCP{L5SEgRIca$;r;*T(r@@1055fD(Kr%70!RP}AOR$R1dsp{Kmter2_S+0M&Jk} i|HV|eo&6VnI(`JQ|4`RX|3e+m|Ho4`wOalks{RFX-u{aK literal 0 HcmV?d00001 diff --git a/src/tests/databases/core_3.db b/src/tests/databases/core_3.db new file mode 100644 index 0000000000000000000000000000000000000000..859ee74bf06ee718b4bf2fd8e9b8bef8e5aa3c60 GIT binary patch literal 28672 zcmeI4&2Jk;6u@_F$Hr;wZQ7=88bYQGQ4*3k-lVRRR1vtTDQ(jf9QR8|tR}mYWSjjk zyX&+T;y{A9a6uf9;2-FT1AhVsP!BzDLx_9DrQ(V>Al|IM)^=PWp-LdVQ62Bjd-L|a z-y2ujd9%Cu-n#BFvTZvi^~j=dTo6U!EkXoANWv!upGJ$qL8Ng4-(ujn*I`mf-@Tpy zX|(?Z|85Xm&jyhvMqqxIK``$Ss(z?OMRaj z6-G*hdqA;H5~8>liKWh+6AwP>h@$0wUk|@~JKQkOSz6R&fgEJpbD-L`qrseUJ!S@D z2wE_hwd3s`ISYCvO4oRTu7qQ$p&{{L&kxOH9=PJsNLx6(hX=yHdpY2inW{54C~h*A zCwY+m=si|#26~}muK5bJ1hnR32aC2~>i$3&2d6@@)Wn2%SPLUaEO;Dl3o9t$;RVlw zaRfQPILzjp=}Ky6tozJy;eUcAi-l}+`<>My{#1;pp(6XP1ew)cCk~ zIOu0L@LLERhT5P61e{6Wv=3jHt4GACtB!C2v01`j~NB{{S0VIF~{^tY^?|wQS73SwZ+k4k_JW-ipR1E46)i%r2@k~&5iMI>&<0~8a))){o zmux$>nZCoBV~iSo(?NG z4L>$-tz@9(E@TL`G-6d$gHfkjzPLoDSIBu<)>Yd#qNuyzi0y(;=V!@zb=R<+9uls1 zjb3Ki1(~99K^?ehwjkOfTUoS{VSB2ET zV z2xQsemW)2KE!(WZX6}o;EuD*lLT;p~P`jqG5QjJ^UYGh-bp4%=o>);9otYI@Q|E`wz^lHf~s(Qb-bxx zc5JPp@_N6o?+0JgJRbwi`H7~crtj-~e|?>&QYvhgUJrv#wq*?}`#{OJ6u9QdAh(oi z<*0tgS=iXj$Wv#yiJ6wr^6_-we=9~F|CN-h9-Q2KnF$aIq~+yh zats8Eqm6s_bVYSlg~HAVcXJIM7rIs`4ktika-^xjQx6GXPG;e?0T9V85mi;@@;p9d zx}#+lP7n79!t_a3-DM_itOdw&hS&eY(i1^?F8vB$*pL7cKmter2_OL^fCP{L5OsXyF%X#!g^C(N#;HMvv2tC=99h< zq(7wJrDxJp={q>Vh6IoR5#YwqbdQl;+cBv}7KPJ-C<^ZqA_zhZUJ-aTS{N=ujSuiH`mTFj#)Q=Uo7_;i z|7$@Sj*GFC0p-*;@BteVKmter2_OL^fCP{L5@-@Q62g&$EQ`l89^EmR%RW*Zwx`>c z;#Mr%s+iq9k+niTSIm=Q?&?OKbj!(9f3&+-(KWKZSG!qh@0J>6!4ee5yEqK2aF`0m1uzh79tohv*bxAG6j)QLV#_fLPi))z_4 z&x=QyP6X8R9DT<(uGSIm1)wJGh~F30wfuXzyBkF^nVxJ5pf*nNDkbI%fb>#7q(+&M zVtyMa)=5GX+o4F}(k1cela44_?)UZZySKv)^PHtcP3Fr%racF$Z95vw8P{W`KL)=A zgIRmt{)w}oSE6){C+J!*k{B8ij}EG#nal%MJQ`{XhxhP6_gXkp^Won_&ZRBTcBddt zzBUiUo(a;A@CO?bKmter2_OL^fCP{L5sZ23! zmGX6BS4Yt%y2dO|->vX%w5m2~YLj`QGQ%hv)FG;EmZ;;IpzIQFAL_@~Hgl~pAZ9Mv zb!;@c!*r)tK~spXL=bze6$hgrnl1%r%~4C;D1?BF!~ z*t)ZlhL*jYCe+f1RaOl~oo@N!5}95hW3;5JcGZZY?t>$?3qFm_k}-APu$>+fu6N>? z3bi0~)?vzE<&*DWQq{2=9=6}!)L$U-Y<1tZG)ITM!}eh(7)z~&`-F?h4?S?19WtY6 zm&iuj1=+N%xdPM5mc|)xHBc6+8>)Q$FG@cN@DCdjKmter2_OL^fCP{L53gXd-;O_xe>d=(6p5FmKcs0GfDH*C0VIF~ zkN^@u0!RP}AOR%s9}t)v?GvJFqkGI^J7p*sr>JGwUbUL7xOUl5Ssu#Tu>LuB4iv|S zT+gApmEsRF@CDKNKIolD*v=lcbk|Ik9pfh5*}XS=c28x`vCAcXQ{CFT_!_9pBs)|bw#%U6t+FW*#Ce~E z`eOlJ$E~hhNrgajb-rZ?9v|TKdRVBje2Y$y|6f!&d$~ExO5gY-^uwjsR@Pd=IMo7SC_JKCcBi9 zSC+Gohs@&K@{*jroL!tf10%C?*GS$0PULQHU*{(YGVofImq-G%-yUjd1EWJY2H{lW z;t8C-02;=ejny>$Z(i1^?F8vB`*pL7cKmter2_OL^fCP{L5r8h z5E$?NF@WJvC<>wzB#~f96s2HTjs*BcAS{Q)P*jw{a;^Rk&;LJ{p5ku+ywud9aU_5Q zkN^@u0!RP}AOR$R1dsp{Kmz}cz!@n1i^*WC`Y-%^{tQ(Ap|4f`hdy5ak0z^X)%rhl F{R4J7=f?m5 literal 0 HcmV?d00001 diff --git a/src/tests/databases/core_5.db b/src/tests/databases/core_5.db new file mode 100644 index 0000000000000000000000000000000000000000..b9f2a0d9e122d2e7f1a1d7f36549af2aa13e271d GIT binary patch literal 28672 zcmeI4&2Jk;6u@`=iPJjUuhebRkZG)tgeHzRj^iXK0yi~nXqtlKeyNAmWOtG**$=b3 z4z)OuAS7-`m5|_X-~#*w96$(hM~HjHrQ(J-^2VR*uU=54g5Ic(cjvu%e((3jm1f?o zx8K{;Jw|qI$D|&~2&V*56y7345QI3qqVQ_A2wa4kH}Ed{t_NMlh1C6bxS`0<7lL$Q zSd6cpR8D*eH`tH>5;uJNd%A5YZq>4_ zs@dBUT`v~$r2;ACuWc4cubfN|#d>=cT_YP?rNWIuk!;;5k*&L%o3o@tbC5>e5ppkI zT)&wwPA_EBGaV+_V~;r&H56^f_bJ@?yTy&$`Qk%ztMHIa_w{LJXy)Vfa5OPLFCJyP z5m3)_^c~;0dPigsfV#LVeqU7A3vcJ|ZkEVYda5IU`Z&d_mYFXAGD!W98Wl!Lg$F>f zK@y^P5Q-+QToI2x>WZT6et3Z2gB@;~=PWI1GG7j|ojFi#+tFaoxE?e8G59SQ%-Zwz z`_6(viPANmpzFbCVq`=-I;e$aG7ntwXs9C`-opdo?}Hq0%S_do>le2e%Tqka+UNsT zZ3Vj6HP>ni^#ru%V;75#U>g2F7)R#<(ZuAWcw7&|Pt1QE>!c7_jfsCCdci;el;1S8kgJ8*RQ&$G+KXd;;uk5AXK>-#SF zE(0AE=8|2dnQ{CHD;j>xmsecRF;9rg}8hxIX*T8o5*vGGS9xXccjQL@Wqv*Utn z+16ZehR6@ukml=A4;C|v-Cvzde|6#HvC<@AdL=xCjB98fGRd5fCP{L5W3PbH)ovw+?J}sYtL(K9h}V2#cmRP{{B>2~XU$t? zrlGhTTT8ZFDLb}SQTeffnjVi*QK`$6>_ika=OT*5^bK1 zC23)6J1tLN0AcoOTj+Xlwm{LS#}?8HnYnasu3>BWENH%&_i0iSir=jh4Y#f>=j3c| zIVG>ILPovhbq|3Loq;PpOpKfyiSgKsA$SF49vK)m0S=M<5{m zt|uqrAf9TAc}8l2%fld;8*Pnitn9gLd2#9TNs!9zVN_L_%TrJrd;Vfr5N6K0>OM1R zb1y)a(|r9OlztMVXVUNRh7AcI0VIF~kN^@u0!RP}AOR$R1dsp{c!>#|4u*yCSdeeC zWccF{7p?bC!9LP>EWo=Kcvs-7(_%|W<~>8Ruk-bPMEYEi{+9lfo=U$+-@pwvB!C2v z01`j~NB{{S0VIF~kN^@u0!ZM65*Uw!#K2lGkTmuFzXNbF3<`1}knH_2fD55e3`Bh- z(O^gvrC>yk2KYrFB1gnfOq3#Wz5Wl+|38y{!`}dSp{YgVNB{{S0VIF~kN^@u0!RP} sAOR$R1pXa?Gf?~&$Aj(azwqPvGf@49zIOQ^`gr|6HeOS!)&HUEA2>Y5Bme*a literal 0 HcmV?d00001 diff --git a/src/tests/databases/core_6.db b/src/tests/databases/core_6.db new file mode 100644 index 0000000000000000000000000000000000000000..87d41d4196920ae8355c184039989e41f9feac37 GIT binary patch literal 28672 zcmeI4&u<$=6vuaM$Hr-#ZQ7=88cL=ORgyM#ymlNrMHQITltM_saes-!YP>s1mh2C+ zKLQqTAVpk3LPCN+0VE_Y+&S>$z=<2;iVzpXl_PKbbN!18LMq~|>g?{k@65jUnQ^6= zH|yP3wsnt@J=-yeRK@^2&2oVG!0iPIrI!6>PBApvJi-GHLmkA+#?*-mabn;_C zIyWsQu1zT?K7kuNkN^@u0!RP}AOR$R1du?Nz_AdGC1qKBc-5mdgSqT=#bNupZ7FWU zvaNUoY{3)E{BM8^SWnFrzzM0KP5TeH^Q;x?5ucv*bL2N9=PJsNMAU-ga^XU;~a47Ox2kiGWs%M2S9m3dT8AThu|57fVC0s(566K2ymy%xOD-;o4`-Ua z8~82+E<=4#0tU_`a61l(YHNYP3w_${_x^9<@E{)})R?O}dfnrE_&1Sr=?k>qDTtHL z%|nS_1?g+}g9j2o0!RP}AOR$R1dsp{Kmter3H-+iJiPbbVpLdLd;bvDU3_M#OfhVg z@_FJ=LDeR@#w<_YYw%^XrkZrsWR0lIFnohLM77O2bvzTqF7XZ^e|%%7)T;w#=8`?f zHq&?c+K@}A!-(tG>$b}@;@PCe$nNcCGlotrk1VSPx}iDDBK96M$VlCwuBXEaPREbk z+t)I16xK6@S{ku@)nL>aw&#ju$++;8&T8)aKv`Or%S8kl6qj+&KL{VjjYq4 z7KF|QnlhL_`WYrw9lPUU|Km;lZ%m%89@v)V=&*L!Uu{HV>Fd!6;ljeJ9=Obom{GOs zWV`Qz?ATVS!ZhE~IOE+8%1U!VmCyf0=^Fw5;eiB@01`j~NB{{S0VIF~kN^@u0!RP} zJjw(%#h4IJ&Bkxl9o?i(L!qwA%$m{gnCoe5kNSqEwD(P{cGgmq=6V62|DT_#3W+OI z-z9!bRHr^lyqEYQaboJXsn4al^u6?f^qF)|lBe_2UroO~^-ki?>0hQR(r;1}D&c_y zkN^@u0!RP}AOR$R1dzc0p1{)lgb?4F-)9!9`H;{}Q_HfwX0};zZQoH@8Pd)$KRI&- zgp0GT=TO~B^A`>Hgyrl6lwMBS&OWtt*G&74@dB;Y*!f8)U%JpO=Xw(o*zlT#yAT3f zey(SuZ`zh^Hel!V20w}y;~*%^cLl9yD*KM@*ZD(rYyZMyAi26QAaU3pgN(GwmPIg^ z11+?-?Rx`{+So`(KzV(w*AQ&)=lOJ9RrpSBl{&Veyv9Rv-mulf#xkUE5v!%c@pPB`gd-M`7V3FP6ElZzw?>ZfLVY5;r$dmNl56yEzQ z=Rt8ZFk<<>1BF*`nWy;mp7ZjPC%JT{*VqVYMovngx_TN^e2;&+&1Z9S^TG)*fPRE~xCMJOsq#YkL~qH-($5AXl~F8zeR0r0PO zEjEq>kN^@u0!RP}AOR$R1dsp{KmthMpAk3($$xPn+{^w8-<~}M*?%bOrT?Lf=l|mi LO<6Pl4@G|gvq}Zl literal 0 HcmV?d00001 diff --git a/src/tests/databases/core_7.db b/src/tests/databases/core_7.db new file mode 100644 index 0000000000000000000000000000000000000000..531c17f31cac65c826ae92408f58ae9beaecd100 GIT binary patch literal 28672 zcmeI4&2Jk;6u@_F$F9>jOFy0TLzyIvlC+8AP3k5ot-wu9DNR#I+!DlLHQAjcOZLOe zuG1RS11SQIZ~@VO0VD+c1sveO#}&jsfH;7-a^S$5_1F5VCqNwDDAvx-d-HbQ?~NnP z%v*1~v}*c{Y`fg1K3R~Sl4MzWmJmsjQt(N@r(WVviPdl5Du?R5swpY+@(aRHy#GT< z85)#Rmj|@NAHxk6B!C2v01`j~NB{{S0VL2Ma3IAKX;qaUUGV9a#XR=9#@UYPI+|B; zT&H4pwj@^83;AM!6!X_s3#3y{ruvhet(s|&n`_0wjlw!vyHg}=qCDNm#^1+*JUNnc_*JIqh2V6HVSUS{Zp%~;^W1zb(H(<_qKC{C(NB|#0mLeFL2FfJ`BaDu@-kki*Q6-dkw%VG2LWdSlnQ&P6{W3 z-g~UlaP(r^TniMc0cg(0HWn?;)W!pD92}1%(i0Q%qpBNWV&QtU#jUVJxEH<;`w`~+ zWH%dgrX#5BzV0*b!GD4Vi}_q`T6E&n+3Om`+WfDBQLE}LINHW9qRZq&dVE}dbR>vw z=(`ZAMp~eR1cFKEwigoJHA0gYV%ms%>o>7;lJ`(Fn5T2IjJ$Vkbeo^QggOMM(0 zcP?k4EMCkK>KMc+>lUNDQ@*f7rZ1B*S~7JvFrw+ZFc8;+F^$cTF@4u^c@K%u>)NM6 z9dMnsnX*{9`!j5+bGJUi*2mlC6GXnN@4Ak`P1x^i?bgLu=H|hbK51g^Ro&#eh>Bpb zQmZ#&(P(JeBQ6i{+OT_PyB34a>}tGEIy>^J5AL&lW)b=w#DP`cd zfzK3M`BwQr`BW(?kV7 zAGA)SUA{vd)3Y;WZoNRaw%DnDXrDaWXczi69EiiK7T!n%WVzv{j6QH3*RDYJ^(9f3 zMw6hhc)FobeWtd<-Ev7h)O2>v9s-pMBW)_2Z8JFG*4dN{;zGzmEp^{-d(@>%nHXqZ zoogC`6n}9(Z|a)J=GG~9E$ubo62I^2dzC3T#!X%wZ|Ik}Ym{|y*dOQz&Q~?Z5}-LZ z(aS9R|U|Xnl2_DeIoDQOKi!`3u7fYQrhenLORl5e`Aj7|*I_ z20=926xCMtYwUgzx}O;UshVzdq`J;LF)@T~_2j8ONt!<9>ATFP^}PU9%?A1Z%9oPz zi}C|pupj{>fCP{L5mvjEa4p1@Y!UZ98Mwc`Xe) zV6(}Hkj#f9k;g-@Z&mA}h6MdH=f(Q}kn&Jc{#5>eJ%I0(&z1L;ccB3b5n?Hf)oD$hzl3Q0mKb)LcHYLK zio07c6nB?0x%6^h7wm11ITke(t>PPncYb^K#!JQB*T{>d*T~XHpOz<<-`fgDlk4l^ zQEm_c^*l$f_}aAuk#PW8>Vf!uQC%xNUwmb|Ocv9NeF3!EDPE(-d;ySg=7-d%Gg2ec^Bk4}`yubHJ@JRcEeW-DNB<@*tb7 z@3BTV(2E0et!dCoKyN+{u;>e>?GJ=;bUF}CE-Z-0tuXw={P)4Wu>2Yxp8vBy4nODn zhuNJoLrEQs^^iF({1S9oWOBJ>-pHZWy^&dL&;Je>xnMA7IQiN< z82?m|zLx6nh8Ge*0!RP}AOR$R1dsp{Kmter2|RED$FIIW9}(8qKez{bC3R-0OfhVg z^0nQNpllOeW0t4yH~7|9)0%Xx$vRP)Vbl%k5Y;wo)bUI(c8OPoGv8}F#aIMs$+LN>>u9L@1yc;wQ5_Mqr-k*f3p#trEVNu4hswGudBMF@~m(c zTkZR9F1mtVf5dj0cum;7Gi`%GV|FDH7G}@C?m_tMkQrsWMz;HI$c}BT?J})yX*^$h zowV#WH&prhUzENT;14e(fCP{L5NFJUy3DK?4Uf5=#`bC5@RWAl#A;_>MrrOB@b&-9B%3Zs`Qogq4b$_OIl8>CB8^_lbM z`8HFfT-qx?6$G1n&!(c=E;Zpm8F=g!U)o7u%;YxYLi*ek?jgO0VIF~kN^@u0!RP} zAOR$R1dzZ3BybY;Z{}h_zLk@0K1(oTy)gw_TywDimu0x@YT(n8us78z!2Iw(7C0M%PUKOVXfPy-QZOP% z1Nxd;b4Bp8r4GlMSOo0!RP}AOR$R1dsp{Kmter2_S+0Jpq3HFP;zf e3IK%f&YXnve~|SG06;dyA5J;nG;0_!&$bFC8B|H4q$jFQ>W64dj zAyt;$U}4iREW^+=OAG9;X|pzF7-+hYY{2bqHp6HzFf+zx8JafRXE5#gnE$>P5%0Z- z$SjF2dW?DTRYktI@7{aP{qMQwo_o%H@1~Y!vsE){70Y?G8s(_FsID&R?a?SjQJ3)l z?fBpL+=GkW#t;1M@?4*Gd5OBdKHZ1A-H|VnJoiWbn{#nmfgn9VAV45MAV45MAV45M zAV45MAV45MAV45MAVA;_A}}(7%7zAq@qKUab5!Jok>475+wgmaetqbPa~~c2yTPNu z-tc<{etqDH(AW3>M*pLI`@MgQY{4f$AV45M;7%a${K+%WK966zNL{4j7gxg*iFleO zA6Ait>zuw_F6N7sV%}UI)y*8s35~xj&2lkXiLq>y5pL4rO+mhH7o?jx1VuIl(Tb^p zq{Mj3)MJXk3Mj-Wg2>T=s2N8m8#eh1)CDSjp-GteJ~f0BZioD$BC0CKnK6sy(6J(I#uQ7}W16UGqC(4t#EJkn)Dc`!;n~joasnfE0Jk-< zgpZQVT-!Z`SLGC5S&?P0tGVj+ zl1C8Q6I*!YZwjKzYkPK;l{wQg#F)WgobZxi#bk}saV1Eq$txnuF(;22Hm@ATQE{#* zw6eg8bZ1=AgRPvlCbjeF5!lwunkdVhA+RxBS0JRE!NxR0Gh&LQ8LFkwoNnqT*VImk z3bY`Low;NgT5036J+Ylr+Jk6Y9h}7J9ItaRgO}MDZ)#dhws<+FsirCE0xt_Pchd9h zFm-?I(k1FaDt>7-yq)3XrW#~;pvo$)BAQrEY;*GU62m)JZ45zgIeO5|X&I7bs+=5C zWJZnghJ*nwLnn%sVyLDiixw?c76@RM&Qs^9_<2VWDj2bh!jNut=})asq!RX58)ddJ zZ0*~eXoxy*aWR>e&~%k(6|^@d^C~UTro;*+nts*W^nvXRqxhT7@Gfb)K@ytIIae(V za5q`T+jKjpVu%{F1Y}q=AS^mst?8^1V`y2iI9@h5#yEN69$Q%2R2!|FF4AP+wX3$) z)HW9B28)g=Y|SsJ0Nmou7%lTO0S#i!(JX{U)ffh&kQ15bBX&=<&zKTK+cRawQ**2& z?z4@*r}NUxfw82kDjyRV7SwfJim4I@iD6aM;Gtd_LpgagY=hNd{GUD#v`$$V(!Qh? z*4Ud&3ryZAPZ2fF5@jQ1kfF~rG`OT`E{0y#XhW1}Nw7|?b=3Z3{GX;#$=LNVg&wlU zzb~X8zv)&F;j=w@d_ixR2^myD{_XS$_h*b?c`eDDZKKh^u9eK zw(9@(#CBdi640KV5d_8*7zUc(lwv#$BTEr^DO(Qjuhj zY5Q*rE6mNNrPs(QaF91qwql|{Ljq~&7L_L~sFo%%vMNcm>WR%?3}W+lVp$W^5g-sC z5FijB5FijB5FijB5FijB5FijB5Fqdsguq*t!OVrX!S_H3T~AVxUypoWq!f8NGBWbJ zBR?|oo{^1_>m%oee|z{N!-vD;!}kq+cIZck-ZL~g^w7CKI``w}o-uun zyFu!!<@5CSQ+?Eh3Z)nGCAF-Y6@A+*XZ7u@n%0ZCY`Rh{r+3o~o37L}a-GjGq9EX& zojr}H*z+$w(ofxcQla*B>qfz<+$fccMoq6~i-j9jE~}fnH_U<^0lcxVmb0pTqhKDS z4@|9s$2M;4qmmnHDSLyHXkIrpCZ@oz9b*^-JXH=M61ee6u#=N@lXL614tm!!$Irib z=?;;Hs~9d#QG&-BKDpOP{v`V1L;cjH7h+V|%+zxA)}4uhCu1p>EmUrx{jvOhDYmT^ zjN-mozOjv%ak>IX0Nf2H4B|_>UJs2dJ0yz;vX0iWIpcc0YLb9}NwrosE7vvc`4{iI zlMqR&_J$yO{>9-tNnSJ|Aa9_Do`13TPLh{+^^K8#uXcyXi-N&hECWxdf^a-<8gxuE zA+7@A?JE4q=yAS%c)a&rcZfV(wX`W4h^rbhT-K6mfxINgXpIp}URL0Tl{%1Ly+h<> zPUKY?VI2~5L`26x)I>xil2HZI)JPnXQ96)M-68S}%hHAz}+s$d8o@$8*g3%68W6`?t2 z>&ZUjolLn~Q)iZH?5=4o_2g|zk?jqP;4d|Qi|MB>oJ1NdzTUYX+&f#GNN~r`eC$j% zNRvrO+Rw&gx2~epQw=VBKzYmz?$(tvD`#Y9bA-Qg?pDujO7V7>vZd4uUw8Xb2pIZH zU4LN@rIs2E!Eh-Q%iF4EWUJXq+RT|5wNUZl^+M@3WSm_oelpiy$e`3jgG{xI>h)Sa z4co1n-N%IoH2Q^E)RYlu9&O6XjQGeMywbRRHT9AmjoQ5j9akADdaYmoGW%wH>K)FuC z*8rkQd`ba@2o>hW33qx4pYm}_gibF38urdJyXnmmySFLO=)qS%lZ1?nL zdq#Ytz@=wVfGCr)>BEU6*uZe|)dggkv9oj$O9-kONfs|eCX~WB9g1Hesy?RFWDE3D6H_q%v&uDBG=s$~sM?p3V^xdXFLpL@H z^qyJ39aN11PjsPx1CgfcRV+y{=U}Mja;Ckq#3i!U`-vgz|3`Y3smPBizqe`Ei(zE}Fn zec|4J+q>EGWmFP;0tEiaBXDx%tpgONW49ayH%&SNQzma3*@{m56Gs#F&uGe2%ldXS zTQJN+vl7h}RU=!-M61PUa&#(MvG2OhpmeFG!5w6KNM?~WYSi*26MxjI8ns}QL_x*W zh#!NvhN8tny`nl9vyjOa%;?or$<(uE3igyrqtxr1E5!>$6E))l%K>f;`bhyHQaqn=>n38fBBX(}=Ij zli5MQ$sAgKBt*rq>&shrwXEv9o&qc6_id}Sb4BBeQ=!bI5cSkk%!Id2tz0~)nB}z1 zqNnarC%psfMOcRwPfnYwXuTdge`p~Zq8@)-;kueGy|kTct>NK|p22!cCc=4-@oD z-mT*+i#=0s2~qc67G|h@v$U36bTXOM%6tF~Z}O59&|R=YYf3@T=I=R%0O z{Iv2+7qMe<#SC0})zW1BI*KYAbkC5zF$^)#Frt;Cg5Jo#l3I&4>U0?F&%YVC+)#=x zEi#y!#K;gO37x^P2XnJRdpO%;H{tk*p(e`va05rL&J_!p;@If5{&zeG(dU>smmFEM z)T(BaqKk`Qj^lv{9q8`15)|5>vV^-6I^=11qN_fS% z_Ep*(sdhsu=3dP#URkzJ-ISQY+zFVo~9i3FKN-dR!vgbq8!yHrTYUHS-n#B|K zLVmU2YXLE0pne>JO)3uMS_w#+(Y#qz38d$&`!V7ehyxK1M@!lQPE;7A#TjoEGL||w z4&ymnfgwd;a|@9C(a~&X6pGlEqoL}37>F~>s)vqmAl6ZDjKa~;W`*r?Xg7AT84hR? z!kF<_b8hY45Os-Bl3ms9qFS+gY=vA!>oQ|^T<3<_y;h$~oPldg-^%1dJT>&pJrD(n zS*X`bvQ?aliBLo>IP9Hv`|7s&w#QC01DVzAB5vj9*6v2{$Cy+X24>o>x~@r}2~Mlk zo1;nGp7}^8+LeT#x+_GTf0W_6G%UWWqvQ+Zi@6SxiV!`>R=1;eEu^}E%@GXXBsfGJ zBxf_)&?q@xk3M)5TnFy!xrx9xl;XhrFdENcA~?k8tt1a2vKZG<3_7?g$hZW_dUz>~ zOEOh8Gr85k8p#%RG13Q&AxzuYz#Nusli9W=H8pBY(ge56ziG0vtbPSN0|dpwRRtm z6LZ6nmi7wC%_x!H$`er&{f0501@|_LVBa%ifef@-iC8J!+CYf9!TH90T`4!=J*y8k z&|5@|iAHw2p^29bb$2!tqBu!fZ^X*HajLq#fuv}>DGlXjOwBWIdUCc!-gCE z7z7G4L)9mytXUz_ZFls3bJ9R6X(UoT^?UQ#YAUOHC3K<>U44yVx+>enay4zw?1gHI z{D{_nbdXWsecR^kS?Uc@7a169@WL5Hv|R%vSx;&2TsKdnln3sfr5hN56ir|t^nq}$_i#F0pXJj!bh^<|;nv-R{!n(wf^?z67S19}oJ^=y&0s#U60s#U60s#U6 z0s#U60s#U60s#U60{=J=xPjpRg-4%CI=jZ_))u4n%k65lRJnQMh6bnj_4=mr>&0@0 ztpCpqeu|1LjeL4UAO3^k;&9*4|2z~u_la{)BRTj42m}ZO2m}ZO2m}ZO2m}ZO2z;nfER9!NisUM|)O#sLnunr9QfhYF(ghYC{hxp^A<-bF^Co0&C6 z#_PEx6|cY}FT<4a>=Jg8;N5sUUOg%pmZ7UU0{m*WaME+~#1PffedYYr<;$Qs@Xvns z-my`2*DPPDM2}2tKS;5%oJe_}9*DY64@Ajb9XurC9JYz$)th$NdQsKHYd3iT@498l zV`D7M;7zg`-X4bc#OjhE)2w7TY;2@R!_QNfFP~qfx-Y-)^7|%hd9^_H1V!r)Lz0H# zi7S?oIK{rn7#!XxhzG({%QEq3j)K|+yrdGZ3DfYJNL|wevF8sDCP~eYUb}&sFAm)O zn|J@_XxYT}kArP&Q%G!MTVchFrtm_#LT07@I7PJaIECFVUQ!rN6xK<8882cKXp2Ex z7?70lGDcNbg&4!>c%i4t3yLM`wH%(sP|G_b1lJh#^kr(58lb-EkKgpmb83!6TB9WB zOdfw-Y(8KSbsw;Za$}O+sWb!F6iJsw$zXWAM^VsuG*d7DqKLQ0;pMEBVBxVOp5G4J z@LeSET^byCwQ{j!w!=u{u_cbLzeS(;W&M#gi78t;;QuHPn`XdyeH=Cv%yV!z5d9)ZC=hv-<=*}>UHT#`jb z6nUGtELj>)TXGEVQYLLy$Rl;S86z(@##`kqOH&z_c(KnW{&)-VWW}L8pUsdx>KzHP zJYy5YD}M!G&c}39h4j#PjUrxOiWg+!;YCJdX`K@_m*8`~cc!%xZ4+l?j0fn5m;neN zL3rd?6YvD7DC@GO)PjnH%mAJjjk3u1YKJ}4qHt9YZj{PaQY;*<>Ka&4Z z=h5)%(YXooY7WtLgNE5;g^kKs{(cABI0R{}O(4M@)a z>xuj^>Crbwz7+X$=kgy_2M5ds2m}ZO2m}ZO2m}ZO2m}ZO2m}ZO2m}ZO2m}at5P0z9 z^DjJ$-w&Mp{znh-_x!Dw-%8g1cSpX9DzZ&^uhrB|NAcl4Ea;rrolW%2#c`;N)}|3QkQMka@up_##-4gcu?6S})!>DfWz zFKJfhE88#$T{NLz*?S_~tktzGwSbbMhsIW;j)s%_TVEORZooE%RsC+8E%RI_I1 zuFHcb_rg5dHk*VPB;u(=d@Okz=IXemd5Oij$Rg`y&A!)YMFZeY+}7`Pw1 zW=hsbv#QJ+hOUvp(@bw(07UPg@ud5Nxse{d+PH#5brk25)q&9YYuCD7zz#Re0(h0I zTF6%N=}fs;EBSwgeS+-2N2J)Fa+(PHdZR8QKCxy6!9OABMZ=vgc&Gn;ptljeHhArY zi=oi@%a^-uz1waV4(Y+UOiru3*8{#bdGC-Gd4IpI_Ao7TnXh4OGhO?nWlOz%W+&;4 z+e)N?!<))k`}C^j2qEy1YJ3Q_Gz9qEQu;&ZFJA0=U((m_4JboCTpFpqUOr_!SJlHJ zus%8suZn$mX(inuA7LAZQRRz;^x42U?>uTYxuFy)I2E-}&01N#y*#N~%N8qYzLc{k zcXt@K^qHp?%oNzK34_7MP6OcvD(N$g9f#MnPwX2EdljBnAGh^ilN3hr;I)(4K3le5 zc*gF#`fH%mwaG!dhd{aB)L!PBVThx-aj1uQrmjBR?as2rS+aqB*x%hDGUi zO4FunK;T2(H#Az2^@(EJ)X;8+befd?OhY=z{BU1)=={SEcilR$Td7v7<*bG?Rht(* zzP2@!iL5n$ye>g&6Q$co+F7#XgG%xXa2cnlX3JP0-Oi5j$)2|@WGo6@&b089153+U zblrYX>UV8S>er`1QqwD~T3NIOy@U1IVxr~Deb3tNOaW=bx>O9r zOT`N-oA($-u}!>pTg%qp&*FW3)ZiE1x|3eHQtk917LT)vsC_#tff} zQoTLjMTM{Q-`(?F!=J^^zoGu@E3lLA982~lhI7&G@4s_Z1~ml;+-U^fTOAIam!z(5 z2-zOhW+cZBu%`27UNg%b6Yui*T03STj@OPiPV>TcbTOW`5ujP~DUrVBeWxj>_KFmLhhGTwX|sU#4YNV9nRTBcbQnw9}~C!&gU+2Cu#E-l5R>2OsQu z$%*1P0bu7l;u9<AGMWnBWP20?8BCy!@hCSa+3_0da=_4 zwjq7vvkP$qS3SEV8wG8tBl{VBcm46fR+%L`pK4{Yqp%qxI!%E$$oQg$Ov`9FYac-C zX2+J0xi8km*}~^j?G5_v%De?kd+P#VOiMaz37n)7>1^b&8EkOr6^F(S0R@yqBwde8XP78uAFgz{Ub6aCWhi?Y)X7^1Y%^TP; z*(ixt-QL)1XQ`1w3_X)oEt63QLyhFd-0d5E#o;aX2+`qAM|$jWWVbTD-|~ z*J=Ah{Y>vRo*_xs&!J9dbd|4|3K5t9Lotf*n7%h;;Hi&9v+}B*i1+~4jz6ZOb87oUZZ}v;ynaB?H&T& zU_%BQWEXp_kgXn-MQ{~kok3gYc#CDMf2ghi9Efc+yW;s1Y!LKFf@;w?8Jsg? z22D;R7d1$NCbBV(p-ov)WSmxxm_tiXa7qJ&pA5H20?Q}@Lo;i(gZlgX+NeVgMFpq7J95}U{W+ab z?`n(!(_&>7`#ocd%AoBUSWr2hkI@#b3xZ+_ysEXdoxdrFb?UqQorx=_5r5W49OrEF zI9A(=L0UmvRi&6ljvAK)31_y;EGzM?z=L>W2pa9UGqNn;+~`wUzUd>5^U`@0v96fF z5Fij0daB~b9!rt)*;&i>`YN$iYF>qzEGx7a3hxp}(iSl{S9(N+N z&Fdd`{7t?w?w(C~iBI4O#)YvYU)NS#UCc6NeVdaHryaUC?D2ly8Sm;FQ9Arhwjt^Z zlh_ts&FZy$TE&~jv-`EYJIramIn1*MnTme&utWKAr^Or&8`n(@N4xVl!TWWoJMJdS z*5SVDYjK*B>WwB{ghS$K#oK7W@0+dofkD-(+#Ut z#<`nxgY-S^Vn&8Ht(+oeUpM3smu$oxH7}AQ?O(Ts1#za{?#KvUkzUCcjVv&?)SRA0 zsMAFq^MIpaYoA^V{g)Yz4f8f2H9!q zrQb&!N<>n1Mu>@=#6i6nLQJtVDF(s}rwW*T+h%wWZ}fZrDY{LTI*;(@`h3LED$E`r z$s1%+k)YNzQ<7t}#t0@aD-vAK?JaM>-glZ-KMnR?A8|!jRTkz!%wTjm#_N`jmRm-Q zH5pFBOs5DMnHIdd4J~izw%$`r8?J?T)3k|n5$fUaGaiv=tT&?En45f~)A11Bwpq^V z+gUZO7jxM(9vMsTrWrO}scGaopJ7Bn@F*Uosdz911exDw$@>59$Rri{8@&1Nb2tU? zS0kT@{4C!1_u3Bpq2nmf#a05FijB5FijB5FijB5FijB5FijB5FijB z@J}9r@X1a;HPF@7H8T9Rfs-t*!(Cm&*9K2EaM|6}Mg7R|1A`~)^&dYxd_H_K<`kG5 z?j1OZ+t=aLaQNhsorj`+Bpg0@xPJY^Lj!Ym4l30>kZ`Uhdj^J_tC7BeuyYme9tb&C zq3(fx=c>PZpwGGL>mKNJu6n!syKof_Q{?^sBa!bR{r9yv|L-p%KNI;6co)F0ME(Tc zfZvV$1m67rGR^|9GyIe9OMkV4 zKm1EClQ;cyFMS(%`M>nii$nOseBh_wg+KHMK6aTD?27y?zNxo6f5>I<2@nVn2oMMm z2oMMm2oMMm2oMMm2oMMm2oMMm_^LtR=~v%D_Wx7F!4P}`1OfyC1OfyC1OfyC1OfyC z1OfyC1OfyC1Of!UY7rpo|A$7tK%W2qv&bJtelPNykxxZl!BYS~7Wqizdn4Zw`5@i_ z@LXgsl8L-CvKpC*j7F3Q9k~*@7`Z<(6zLiH5()>O0D%C30D%C30D%C30D%C30D%C3 z0D%C30D-S81O^`JBah=#pXwjD=v;lIH}n=p_>KlkSSHIjB8o^ap*IHj_*uI$V3k}&95BG)6*%w27{e!p&clCwBcEV;)Xu!S* z_k=?B#k;#h{q}|49qO|$LfxTW`=Y-))PsxeF!j;?Zjuu2>+iDv;$?^w)f>7(4S((6 zcMtp^E`m>hK!8AiK!8AiK!894fft^?Cv^VWwXPRA`~0_xxmPm_x|vpUS#n68_t%?z z$0zykk+YNhDW{!))I55xnf;WrkZca3Yw_j8WPJH5Ibq3nCS2ROgB^|mY?83+BBvL% zoWF-th$?EjjV1pHNq`}*<&M(L+Q=zLo+BvRi(hR^>U?t2;I&(oyKVU2f60cwejZ@` zm^lBh`+V^G?|BZZbdQ*895CvGt;1n?-gEfs)t=HNx2fBAdX=BS4%oM3Xi`pdCNCH| z&66{t>Z#3}zMg5O`ntnOjb3%{wsd;^Mx!x}zc`&r(^b3*-L%hMtiMe@-MHQO+j2^k zo9ttvrF8VFC*R<;7azVWbe`k7zQH-Utj>`4{M!ym_jc-kZyApec#}^Xh;QiR+a{PD zh04e6>y{nTEgi)1w6hyKNmoldI}GnkCSMn#znAFT_Gnuoql84cSSxj+;BeTYF}!#+ z1=Y>DFdrRD&L(jPW+I+S#K)2?0$itA7bzcAkC=H?)LG)_X<{mgQ#n^>XCa|aM{K?H zjfz(<)z4n5vtR#pPbax-p3>GlM7N%DhBUSw;_1~(w<&$Lc)Fl8q?Z8NNuFFXt)k>_ zbMx#vC=J?FG#u0N4}PRwix9Lh)I=c7G6qgH5a>ImK0N}`Ssq(hap6ug3e8!vq!V6R zWU#4N|N9lHhe20@gZ=*v6EFB1AP^uBAP^uBAP^uBAP^uBAP^uBAP^uBAn?_T09pTE zqF$mR-xN7N@_i$Z5C7ZYr-y!dD0S|4&t(QbJNS<9YvI(ue;#-y^!ZS+|4aS1`iJ|z zqwiAhzwQ-!exYZ!`?tE^)%E{$Rl6uu@!$O^-Fo@05h@Z1_gtFm>Y5vf4Nx3+*UT%a z1)R7@g2;(1FVmJRs;b3Ek)F-H-NO22R$JXkC3tPTIDL>> ziR&9PqY1v2Gq}T2Ih$9@N6lacXkKoi$+9Dr?{R5ztcas`bsT++6ERIm7gfVzcv_Zq zSrlbijC5B^^GC7C-Ndn&*b`Z^cC?UNpEqjz3bS21$VjDDn&n$BKNg|xzT4AIhG%&8 z$*4<})hu$ft|n_dPa4N-G!9s^K=qMUsw~4k@{xX*s>~RYYM3(3iY7-ZiU9@$ z95oJ3z>mpChW3i9wVJfBHNBGFF;@2uG`N#@V@LbN$qg_Gc%>7MYS?JGrwLM z&8lJts?k=eG($i5@3&p5k_CF4B6Eg_kD^&PSypC6(SlS9=;p{^VL!IbY?KcZT46tT z6wl2TM2215kawp?^Ce5kw^A+Mdimj|1c&WW!!Y4rOu009(U3((S2c;{c}5pGK~^|U z(Kyi*d0y2c)7HGejJuT#x#^$!SWBPH17B{Q8 z^}QOM+aHf_t+i8qyp^iNNWEX&R$Z#9%quw3RbUJeqZK{O>a4-?tip1N!kgN&ch^dL ziS0yjH8xQ-(jqgJH4>FvAy$_4!_}2>bFrQ3+ghoj4SibmnoE_1f zOf5^0SxqrbS)!l4E4fo%Eu~XzF=4P;I%VvcTjs*%>TWWjh+=kayPfK_R;mIc^xv1b z-=%8NoTwQPYaHZCOM)Wms;F7$T!t5Ti58!Y#48*7>$6OZ%?hh&t#%-$3)!`)!!hxA zMT^%`Z30~R(A8F|&`bU5Gw*Y$s=8(i@OKhh3_SWI%isO%XX=H)K=d zBq#?CoSaXR>pI+-JS69LG-Je!Nf zFra6qDmrjrc=REcDkET^NQmvShQVke$ElXATRf+zrb6qc{_Ht+ zes?@CPtM5l+@3kVo5<`)@(ctyzL6csx#ks+6uiC`qH;wcd070#4xPENH~64 z!qLa7#+W!0S%M4$X;FT5a8cajD~GAwacgF)w3+^kX6~O&COW4G}%hE^dZbQ zn9T{*hF%)^txtFqKW0{0H5pCSb(1%BUK4c-K;sl{lapjde3soTn5CIHX<>J?xKN(S z?@wwIdzsprHLj@nL~5m#C~?p60EOK!sjLosz& z7qW;ms2R-g7{i=K(}JerxO+~x%FZU&=k%<(HDM{KLw>E4G}lvO`s(cJ+T=u@KIp*2 zc4sE|CKI^o{*ym-nGjW7wq!=oEZLNKLDM*zW6%#s!$@P;tLsObiP;3db~wGSjUGdX z9E`4y3!}zTv6@|7Ge$cwk?G6?*JJ`$-JgGl%Y>n6qA8oYNo#^(vWlS!qDV6$!vP+h zRj+C$mtJ1VtGazdO;<)j)jzBAr|39B;`Y?BFG4QzQvI>!Qd1;&7E zVSY3$(==IzwhTkkpwv|TYR#CNUP{hP9jz~#^5(=kGj>oBrzUak^}M#bsk9Ar$b{LM z38u*euDbr^fAAoI1JQX-v;Y;b;vLimqQ(Hw1n$wVc?}cIf0%vz49y$7 z1!{(2K=wF+!(?qJlB$Y^dG+r2&iY1WN{a8U%y7cSx>Cv*+^V)yN^o;=b3b?7PIax7 zsy$==WZb1{>59gv=u)CyR2dewAZi>hYqUr!f+=3TYh65;El!Ou6!-~A-zk<37Y{e* zciEht+}oaA8rRyXuC`JoGv-@PJP4bD$P22Z@aR_K8&iQk#ZQsHI>bP+jviHJi(}(U zliTV}vA7anJzCwJ&u+=H)5#80ms+Wk8FMJiyHpjJ?+VQ;Dr;D*Ea;MGSsDyoLxWO< z&2aVH#zvM+FEF{ISR%2IL4_8I8q=k#)n=cvazOUWcm20+dem3`f4jl`}u$B6)N&lHmc&Yo**Cwenn`O% z-7`~VZe!zcx>8BUkL6u!Gnt*9SdF&=YjbsQ7FV>=!WBsf4d4AWE>|jyY7Mrv1ioNi z)240mT83m9sz~!vpS7aL=4z?ssj->Oz1emCczjzLQ&P*^su^FBs);tPBEV|j=E^mZ ze9+KxD_2N(=uykV6ijYG)&xZtX%W+t&hspMGtlE;lb6lDwb{KKf0W$XoXSn+kIl)Y z*?exTd{C^VrSj<5VXciR(t))zp^{peLPGa19{st?l^}35EGAx*F)|n zu$m~aeUrvia&}_Y$crnB2a8&H(pt{d(ph$1s|W}3Seuu|?!fAqT#2n*At7ArDtfqL zpcy1sks60Z8oXDmY4WsS(Xx)|ju-p(x2g+8c2OD6j+z-Yy&=!0a%uW-hd1)mTlu+4 zN31Gmas^Y@-=j$A`qy8`yIe`EV6cn~7ZcVK478TQ(xN5Fn1mrzGSindl6$L@^V}pY zBxB3DRDM0Hq>fT~d3sc#H;(5!a#cQ)D_FdKu8=_ez(fDs<%)xkLKh`bwrCno11zEi z%VL0@2!*a2Oy88WFtfj&FiNS(-NWtl?AAV~86{1q%<3CyZmQh|CUUiRCRZ?l{ahiT z>)^Tn=5ht!pJG8Lm?Eb!SfTK$3EP;M3$&)1s?wL3%dqPLUt8N5EzGVTasp1&TbzcZRIo7jbdkYRXmd`7|DLFkkI{!zcgL0I1RJD z0ke_OEmJb!n&lOdhEc2Q0$J4d<#yMF9dXmFi4(KpVXd;Yl-S>sX7(0h+{}cz&9%za z!L3ifJ3`%mKk<}yJ09V+si`UQf8g7G!zGNRf(jI%9%aRXo0L;Ig@bfs-ZrmI^*+4hwik|Q4ceEn6jg%BgU#4FtSiod%gvN!m7`wkKaTB9XV#0V zzC5}nNyqwJW@}zarfb!*ahzYOwAopEw?2K`a3eOLUK{vNJ1$X!m1LNY9Bfxz#-v1J z7O+@akt_uaa0>NW-|Bo?iydUKCaP94<9kc&;e18gsH7)0mzJh9v&~P7{yl2x-??nH zNd9*%+}k8ANsP`Zx&^PlhDEx>Tk!issT1AG>lUL?uiZUXUdXBI$xS6YI=WPj$M?1J z@kV7Scf7T{#!d@Lt7rjc*bI&nzTV!cjj4_PpIU5^g+o|11x__34bE_EUo{!{*?34d zXKIS7>eOrDZH=kuJ4RxCR#q0P@!e%QzBZ}EjQL4nnI5mSX-bfVQQ#-LHnn#CT^C)l z63@t3jlimt;C?qO^eg5uU1K>>g0lj>+q<-pTAeFyt*29|N+L0@q!0AiVZNr#Ku8MQ zQrqeX(u`T(C(Rf``1F4IQ!Zgl?7S+$`!8Zf03}AzWLD>7EFe@&R|54~PaHGmltGX0 zj84U>OGm5xmYPT!hs(!H+vb?LU+4fcW_EvjL3U{S>s_+295@Dn6&VvZ*7>G_wYJW{ zlGd>nrVW;Qt$Y7K-dV}6390zv*d)J_oZ8x+pE$}L&sUb_`1wRz@0MnQ4bAWDOje6aY-ZCqVwcx;<6EOMtIMO=HR-Svlebc>fFroXH5L(d zx!&058hNKDqGez)Y7(axFTumf!%Hq{fGiRdf*6Zeg$c8qWs7Tz;_`lLTMuu&%tWa3=fmC2>0z!qHXpaY zgwHl&xB@~S#f+pPNt@|rbp*j7FHH#q=~}T z{`6+nJeZlGTWMEsy-eGJjWtI@e_$YY4Spfnq)S{3q6%ygj>la(FmV zSz74S;_Iy~24(7xFMAfKiYb}UQg}Lu(G3m5T44;xvy7k~$3V!a&vZ+>!Z}f)|9QrSf)Pzdr=flx6qT8H8*%xN@1DT%12%nLjM-F4Yn%nKsuY;=+i9_+rt5aNpN^ z`c}i-2Nw&gVbaxfGKk=-!*ZO4?;KXY73mXKC-)0`m80a$*ve%5cw8Lcq8BDJV{2>X zk{I7=6KBvwB*aIP=kHnaOu?L{!8>Ro{)`wRqLdi83Xiov>?IHo4S{ay({Lj+d2~s;uVQ)CF1pznS_371o2;VMWL^l4{Ykszwm6E^*Whc9Iv|o zXnU>JlF;?-pM1s@B3?FN%t)Ax3^>d%a5Ns11SeaVlr&5Zp;<0dn%^|C;_`TMu999n z-YuuMCW^77RI;*Tyh!VkqDTl1VJgOM z1XfZlUY4}bxKg6^V|H?BC%axgo*18*-qMcaN5a8wVTIueZ6TNSVe~1vXSQYh2tosEi*U?=7;4#7h3~ z;84v8@$Ay!^oG#pskS?C^GvAhrJ9#1629pD&MB8G&af9{h?OB?WdbmoWetSYbp~rS z(cnXyg}gd5U)xyNU63Z+f(FR{O(xvs}#`+RHE$>v;#t&u;(>P9K zm2`2gjj6l9)I)7hd6;TA;hId5gsuzLyIiiY*9S2{T4Grk4W^D*25cuRbFtD?MT84` z7ubop#A4Q3H8x7@QfYQCdsx|9pItYmQ?rZ9Mw^~T2Y&8j7tiFX?u2u>LPGb4zOU?Z zg*Yw37c>pWrE2mBy~EXs9S1U;(HM)S(krhWRoAmK`_fS&U71;1ESIMAxw-NA^mbx% zRWaH;!8TWKIg_ip7tZAh30=<}{ejCBtH5B_3{8R^FVQ9qQ(n}dtW+M$HjCzau|UgD zO~w<2+)i1E@9uL6Wv#ZMO|GQIYMZfgxnmDrIFqZo7tZAh2|Z7L^jBQ2Buf)%n0(My zSV;*8w9$e@9B`ruEoQO38EdbsE{=-)rn-(9bGEj4Jb$FE?HzBXcc+g>JNMw5&*ZA^ zg>$(=0`AgrbqN|QSbn_c6M zSEoyCej&Mhj0Jt$YTE9>2hZfH?uB!?LPF2%FOIleVS5th7gpf}Y#KtJVYeIhpDDVG z;5^TpT<<}}$W>5*bTB?OTV0#ukC^<@N^yo;uO#S0R_NG+51h%Bt=heVlBo7y{p-sv zSJ-R9)4HL;xr3<~suNNUmycq>&xBndOmErL_YPN=vzyD>qG%~cd!oK8m)0k_qG&8` z)H-?LKKHTnXL42d!ns%>f%=B1;BuuivW^HbHitm4EJn~}g_ks0gl|N$c*g8iIDKP3 zDIaB~lFaPEk%{$(=LeKe~K@V3V92M}+&=Q6o z_94**7C8zF&lqfEh39*76Px*^t$2A<#VB0g+^$)9Zu5A(#EUzHWRh!JOhc}I|JD9B zt|sFP^FGzyk}$CNQ!lz)u>zS};Mj%UWGpPjuw4^TN!o<9B^aXCpEIZDDibjkfyl#b zda0b5FXk7gx%Hh=woqY{J00Ze_h0Qhiz`RBd$=MAJ@5K<#pO!E_7g^jrGPCCa6urT zXF%}b%7gO(o-VOJQQl#4`^N4xzmm(-OL8Ka*`3%D_w$F~yI5+or){iy&*aM1?Ov{s z(DkRE{brXd4GK%ghD(u=;qFqfWLDsEHceW>lE##c{(NeBbLIdJu+;w2gmtV;&K%C~ zk1aB5>)F%{cib_S@oLYRT-mza%N2>Rq`%|8x?It+iU=drfdPRMu{%H^GK{4Nt-}u~ z)BW)x;-9filg=Mb93&6+kCR30x!bA86NuYOtl6;#yU*mxvD#fhBW8TxGe2?FM_Ed30{0vbmZ|8FO2!3u*?Ef0SNYULBwA z$W_;wTsc;|hbtuXfB46K$-@-~=aphXvU!!baY!6eQsF{m3@}9-{rtAPF>#!j7Sq%F z#r?ereRhkVoH&lH&n@T$>!6cn`2AO@Gr4lCb`Mub=+0OE#KV=Ove2Ilj22!O5lY6k z8aNtF?C=sTSTz0YZZ5OOZN?_bi*vJ!la&c;*Gx*exmtWjE2xJZd+?R7JCiG0w|nIZ z30?mt>j`oiFoJnFNK6L%ouE%i%m;fzu-BW0FWr>-&Eop@)(X8dH$R$E;KNB}Xm)aU z-DFp$3|A9aMGZ!m{?19l(xCa$1bVLh59}W8(Esn)1 z5fT3Wg97I2iOv0^(tc(v!_6P$GqJfT@o>+|t(b>GTP%ZUhF1{HXf@kCOgLt{KcT-G zKId{pwuPGlq+AjW1CeP=%qAv7hQ@|dY!_4e3p2vLkVskF^2&Z{hdU~iDs1^+OPuX-gEAc&!q-`ZtzO@TL!*7kO}=-XsiFT{X2bs(RbX} z-TRHb_w;-h3Iv}3fqxPRR8Icz{pp6^v;J5LALrvwsJGtj4osL^*sOtY1s3`Y_RZ1I zuRQz?*x8TyNiY@$myVf?F}`f@IdN$N!73du04ZQ)nb8UrQCXr}=OIG(oh@{^F>dU! zAN{r;a_PeS#STf>!MZH)D(nSWA)8nc9%B&G5Tu20C6*{G9kKku#^%@}ovbXc?qsFv z{?x8v6~@`U_6QZ|zN3}yC^yQySbf!{D zpoCB>mgO`JzXev~@4o(S5zrER$xXqr!76RD~g?P zhLg@mm&MNUxBm8Xo(+qbnPhaSV5*>m7jMBr!0ia#oA^Zn}o7$E-%tvxZ(VCN7>OEzaV%NzKs1ySm409CL_WP((p&ZM+r|e z@iMdOLf@`wE+3W;M-Ph?r8o+&+b%Dym5*i_cBXnfHQ&Bv5p>~c^V5y9@$26>_zt&! zX{(=p9Rx1>86U;1=onPGIQoD35doVRO!ygyg9&0EAXLloPeFJ=) zi9ed1@R+ZRiT#S$&?O;|%`=RQ)g&x2Y@I-`2JQi4!C+Rh#^S+V-rABRA(bqwo6Oiz zVo{%&FKw2_+n&vVneq9bMxp#!YM0&AIt)Nf{eETv*)6wmk+$oX$UwP=AYeaMER> zzF6}o-1BwwZWj@E38Hvdd03MnFWJk48QJu-twfXryLEqR?a;c?K zac^y^ZD*=-9)xtzMzL1#7&8_C!dytGkXXBvPKkwzrS*m%sLIU;mFStx$ z@v4$XB~?rSH2l08mPj=ASIY*T48gv$@Wg!exH^}bDQ*a{#R=wkb3d7+_l}AOx~%R_ zaqVku!j!Xb@HD2Jm70esB;51nFS<-&BNY5KEOtOa{o|U&+DL+5m&Ser#M_zh{CEaF zp6Rvqt$cCGG^dwi#rdOAVQEE9WW@AAtfN%5ZyWR? z7GgaTYzC9VE*eNZ9$+KUpl~*`uI@tOgVkW8oUa#wx-d zc4IIQC`rSH1q*IxNet~_zt6IcnU)Ww9Jye*Eh7hl+Tt%X|T9$?l0^gE!WhQy~07Il`FFTe;f6KROEk) z%#ZwEBkvzs8o6iqUk$GfUmW_qp$`wO4~5Tt_+0T^-{6M^uZCXZ~AS%6MpS`z*D0L}$_W<6Z zXlk`=&bVHws@2N%f}U3Q&9a&?-S-aX%tEHR9e0wWg<>J57maKo6K%YiIO^n>Gu1-< zGFr5vYA)BzQPE0!*Fw}YGfQWZwBP8M)zg)dTIfKpemhETcOcfdT{bh>VgV#`S3}fh zCUz!C^RQS*`&&qo8to{VWp9hCWwlVz%h^)ZrtitAl}@NDA!_%Tsq<%nN=j7Hs5R|Y z)o7`GFZwzt>cv{2YG-sSv2sPVy86}-_0&^OJ?P;eZ&tU9M%u_$N;&n&?e5o?Ua6MR z(NA3tQL$L8z1r65T7Nsm7aj{y6P)7hhFr0bF)LN_id?gf{;ZP}t;-g8Hbt@d%iin0PSk$*k5nshHJtt%R4cnt%VB4wcNM z5QVK=UJkQ`Y&EOqn(rU=lX>HJ9YRtxL|waf%}Z!ot!%sexBqeqzWd=2rKeKu!s_7f z5l=^4NsSFbt#^?td=*<5UuPCuQjLc~)Dp9F!6T`T0%|9paFmUQ+Dda2R-)BywMt~h zj9SHVv{WuuO@z0y1=5G*<%=OIc{zD16~b|)UcY@kK@!fcv-}n)m8o^FG&`KD?RwLr zHl21g6=Ge#G!S(Z5RvqTPOU_<1tRX)>8h2sFNCOd=?t1^Z@Ep6If*Tle2I3q)j1>W z*Gl;}hp5%W8I)|6T2!=KLouJ#ZS~yVFftv~3rCw8dhNjwmExW}9ZaGVPgl?FRBRop zm8uV5eoIZAUa!M)`t0gk;b|vVZ+V^S$Ip;^Zb$6(=5f@GcE3H|&p^hgwDfagEY%{r?kK4@ zxH(wt_Kx2d!uyT52L~F*AY8BM71%T6NQu{%r`Vwuy3sHHj@bqZ?ftm@U0tuE9CCk@6i5blPWPp+Oq zrrzBRwtOU8AU=b9OHLSG+joVi1#a^~JAx&9W^US+m6alC80KzQ@hv)UlyVb@CTwe3 zUBgR}`ux{u^7U#lmo?7%<#cj-Bt#`Ht8FY2+p=I9X+%LQDd##$uIFoc4YnXOH2H-Y z2P4`vqS{e|Pht?4vH7Ag_skB5D0m?~(um1vKWo7BtCe$225+0_u)8o6q7+5xR9ivK zo9)$|p6XP{p9@h}u3ULA+$`!V(e}eB_0(Y95^R>rt0n9w$=vR@Q(OwS_LklE&B7gi zIR$42TEX-lBhf5>+eD|fTnJ_XlW*($j7}_b-7HkG{%PpHi3BkPqdqdRIm-M!APnwd=qzeL_u&<)?0zGFMS*+YD*<+J5%*k?^Ldo?I<(O{6PA}$5 z#R5=r$9AHT)EFXWWl=3x361Ku30uv6bDVqA9_RH_Z|cK)w>w0|pETR4H6H`2xX%Gq z2GZi^nbKp@2C2Hyl=htzx3|)-st&*o`2pWb^luTUia5_eX>jKdJFa2sIdK$Uk*`o zb93J5eOujE3#xrggjF`pW0PEZRx^LU`%cT=z7&G5<%UNEL64<EyEJRKP4QnlY`$|A6ImkR>6QaiOE={- zT|Fwf+Tsn}a_To`oCUMnY5qza#G3=;4c~OAY)pg7lTUhD(O3@FEoZ|lVX0Tw4%hAJ zmpG};N_J{aO+gbrEqE9F6*Uh#B%LejDiInN-t%AH_OU zN-ArskR;SAo4}=~qPI^tgUaKMK!b_OaVt43!C~cS94ofeX1itWN?Q90-gED?MNXk< z@U&(ooY@9NF` z&M>M>*~?&i>N<-=J$Jq8*#rjAHR-gf?X_K!xb5mI@^+T(EcM#dQOu802bY6`iNlt? z?yNK`J&V`+Juiu=c!SmwEu(v1 zix$S$+k2O=VK3-B1Y5+^t*S1iZbH;ArVjnO2$)Y{URD;)AXyi{DOX(Gn(L&TEcJF_ z$apf;$)vvq6QR&y^fg;%3r$nXJT*9Dj@&+rz`LGA4{{Uj6CemQdfV&4ZOVsBse^d9 zH?@*_6HRy&i!fVeJYfh>aPw6bn<7w|ASxqzu$|SnqeiiCrQ!Rl73}#Rngr8*HH$-^ z>L|TKfxC|B5a|Gkx>woThFPL`D{!tiN&bKAy?LA@S9Rx`nN?X;S!*v^T1!(UH6URT zrC2hyl7(nmLJ~-7p+*LTpoxr#>~5)B-RebZEgnrufNWYGe^}obZ{Q6)-UfsvG}zco zSJl`vm@zO2FpCD;_+xmCpBHRn^E|xoiOh%_*J3LhlKKJ4S3+`jVa zc3;VAUA@9R%9O2nRG#uo>afYb z)w@pPd4O_bg?tGEocb`gs3pm4zg{M*3r>`e%+u2)cNrWh;f*WK+kt&Rsi%%~Gk=^v zZQA6oMA^|j>A_=1PW#v18Xb5sWBDsTP&WoVWr^fuKJCY{Kk6*do+~&{>+&Z!2q4oUof-q_~K%qM0J%v?PE2h;aXJva5pRAcg8lflH7CmxxojW3NS zNQ)FBYOQlL|y{s;=wdpivb^g}k@p5C)Jx+i7>p89b8|DXoR|E_);mgLI6&53XTTHEQr-Tx2jxAVW} zzkRp^Ve&PvymVKb*}hw&g$w-){zcO%U8YXRTqrO z!4n;h=J)a#`w#AU019Lq@&wZbNR-kD zuKN!eHqx1SXtZ+g8{Tli*nQFAeZ8$MxcKhuzwbGEWcU61j_%!kXfH6Bq~5Z|Xouy! zJd)v~bH(@ODVf6J5tB$#9IHaJtJQ|TDgj=Z>1lU z#PkZkxW=Emzj^hpQ|TJLE4n!QR{8Jl*CqbnR@qsZdC$!3^hc&|ochYty_4UWd}8vF ziC>+#dVF#GwT;g=?jHNwvA2-@f6wT}BfmA`*8iY>$MAE*_YVET(2ozz)PA9MdG%w} z?Ug^^XYPCJgAED z1`ZR7IUzS0=mVJsC2y9|a)a1F2nW?{G<;EpUTf!_ZSLGU)8VW6~eUXz?ebi57G!8e_H{uKt*-j9GcMc+5xdEIp>} zB(9yBG*F|?)M$qCgvcNplmyXT#B^gcaMWWK`?tKBO{ct?>|fupr5ROs|5lz+kNN29 zOOHvtpL7fGj25b`1{FRwt7*54h<+;c8L=o~AHz_h+rBz+wCTlL(<+Cr8 zVm8`0Kd}r~L`#PJ8MP;5t^yS>PXLyP28qI)HAwq)Au<56OexInSt^SIxHssFy zAs=~3X-UyL_h{v48#rJrDaznR;H0TRCkIiiaa%MFIdw@b^i}$n)LF3Y6_?a%=1XdY zmlqFv*VUyJbwoJT#g>zYYcmN{;>DIuvk#e__Xcw z+It39EvJ#+G_z5b=6ggYr4m-gOqtz6X`d%w(4G*K|{D*Rj@teMdp2 z?s`G7ptBVZ2?NTh@mIVT6HWv~L)f*n^3kjU${jtq!V-^SdE5_9Iq_$`7=IQW4FjEN zwCY?Mf1r=B@^sn@R^L#lB5|LHJJKko^Zw~#I%g{`BFG%Eu7TVO`_pMU1`Pr6LL9W1 z2{;_2GuGTQ_JY|gPbYfxBF0OY@nby$S#LRgT=q94#L5u8GoED8$n;of$;pMlMUs!OIi(*^!1u(k%d0yH&^PdnGcTfLu`n=DdVK^w}`ze-IgTLEs4DY&F9*#yVy3@{q5d?~{F_rJ=+_ zM7$t15XBcy8Nao~5p3X;#I8fVe%xNo2Vm7sxsat7hU}B!e5hkJhLUi$!POYkS%_vg zF0rJ%bulkDjnZAi4n#UMe!D_*+2)!q{}xDTa^Q4=u> zgN{a=XmmKl04Fx^kdU6`h9V+fW#LvIFn>Ua|1YWh*UHRWXC|lLG5w;c-=BKzkfuHzS z{@+9I6mQMiyxo#OJfQQpy!VDjyF~HUOru3p1_RX@no^)`)JmE?Ol@}$Yy;S1=VjL( z2!z#~vk1oR1vi|hO)>Y#jxH`Zr#@c&!;yR*AA0A7rO|1;K`%8nWiw!t5Dar-I$T21 z=7o;gAu=Vw#z=7wKJ-pGx$~({J>p}=z-#k(`7KVK zql2~b!#`3Q{f^S;bOA%g)Uyo7X|nJr_HgjIEyDybpqG+kqB^cOdefc{Y@jY@g=fFF zH2STj(Gl!IRvFYFVg5Ow2>Zl$F%raaaAs-rhpupU7q7YLtp&&7*`+no3Zw5Sjqa94 z2Mqxf0qjDXk7ti9UODt!4QZDZ0w|zSsG3 zT&`cF<1|KZhH}=T<4nWx(?_p-w-`NrVvNyeH*Y^bgIqnL?-bw@Ib-oPh+3uZlFS0V z%oAHAs|#qqbikY8oW<$Rj5DZ6{5m(j=K5VbYVR7#&-A@d$TiG3TV))&QSkDzrW_$5 z*D#<}=zuPJ*{hY(50wsod6H$talCo@C0n7teb>$S+V|PDpD2dfb0UMUH*dd4hC*Gd z1>E2mz8&*OZQxGYzqX7Jl}#i!ITu2A?V+rBd>=Dt-UbxST~>Q@G19RUY4@Rg8Hugo z@l-^i;>I^=J4UqMwz>W2-9XC}<}O@wB-@$~_@rmuaJ`AgYR*$8IZtwKzHRO130tLc zIv&b79gXs+TESX%WRcbm@;GS}{18s~+5;mIM=atL1NTq#uJXXUN&`nt zlwoNs2ZrtPoK8$5C?Zt=S6YeKX%${Y(7P(h_HA$L8&9Y2xq@$}N67oF{lG-V0o?!{88>_2{&BBT4}9N=3@)d%Az7T{UoWB8wt>Cm{X}X*Dk1 z7ViUSfS<6$zBs(J&~fn|=@6PqDL*Of)Alsuf@ymmoy+qBu!w&4oETw2xAqP|X;Z?n zZsJFhS!;qK6a3>+bGOIzeWD8)(cW6aH0KGFKzr2rSB%tt+mCif%Fe$Q!hOgKS_XQ; z>{-qPt}`w+W-t(KMiiFaHAljKpcAfR7sp>QQ2MsR-GMI1?hbMHTV~2$k!%t@V-5TW ziNtAfo!BYRb+pz%wl`0s0j}%oZ`-lg@oT?O8t2~9IQXP~B6w61C9Dt|1^0kE3ZTku z(P)BhCe}LR&;;DKgKV6j_Woj=_S+6I4tG@cJhVskz})dS$*$u3+CDo07tD2}%OuGU z^r_d}f-kFc%&*{mv1ZMywD=Wv!8|aDi7?Li{3m zox@PwksGCv-}-QAWYVW*z=k()N(doAE)8i#07yiVHEaFbRQ2Kt!YrPdkxv6QyfDLS zx+9OMktP1$TKS#IOlSIg)Bkq5I`!z((BzvZ>l1IC7#;ty@kZlqjft@*#->L9-RR87 zef4kDUp4%D!^)+97DY+`*=*alV(bG;mf(OiTlL_}kXKgr&U5+Fzi zJWF~(5Y&*?^32LJ=jZO?C12@mJh>Z#E<1mFW)SHMx}L85zy$`%>_nAx zc%rRe>+r$*;nD1F@7tfAMWfCs88vhAjTp79XH-akT^I_SLNu8rf!^d44@scYPt4)C zBS^BFNbp5Rb|=RU96F08Z7P|x@#Gsy^Q1G+#hbR9ihZ5M)5+aeo|(vvs@}gE2k z#UtTH(C*?qppGi9Ki4ZQA@TkJgGdn(^w{mAhxBJDWk=Ha4|)rIcG*oNpTihh0n6Y( z4zW^FN0nzb<_2BzC!NMgp#+@YTR8m25RGC&0S_<>9eCmo;A5^X#|>P1Pth@`g__7w zo!QYdW<$xC%_jxtxT0r_&iB=lX>V@5MQh|{v1rpLUx!(j^vohVNcON55=?Lo;0cj0 z!2L1eGyp9P6DK0gxBt-IXm|VI{zPY@@Kfopz)M2oUVf2YcH{?1phaZt>Dq?qu@hCE znaM4>0>!cQaUs-3(WGaBWiAyrwm$PSA zky^p7#>v-|c2h*}CVJU8v;)fYu~Ul_u%jlM)-6EPhe}WXyO))QryD!Hmm~|v)3PtQeIX-3XiNPjco#8UnraO1 zgbR4-L5K$TO2gk&divj;EDcY4_&9~|0g?+mf?zgzF!I`92c|=49oByA*FZ;p)?mIbqMK3>*@UkA`0MOR&s^m_mqbJ zPZD0_FJ2;DjsrI`+O)VPSIIK8B3Di)2Lbw&me*dW^YoIynQ!9m+bga8BcWuTd27^pyGfzI>QT}bKVI(+gvGqb8%_-k4pAbJv}Wp=vZuk z_2DU`o*;kN&`M3hM;bz-_N_C#v*5ZUP;62@D@^_eOT$0CwKP0>{#Kh9mC#W_;S3{7 zo1~9u?cagjvV8y+Hs4b|3N;n=C(TN?hiC5FwPo(q-elFX48Nen9(K8#&1Dw>_a ze6$G~S4ES{FCIw@CLc}taHXyHSb6yE<>5nzj>B-4!$_#Dbgc&x;Jv4HCE$njviB05 zrzeZy%=73FGFF)U50p;5j|llGe|nQHO7z%?AYXFQllEopX*Wj@3W1O|3!t+)Pj6>f zx@mg!(OF^m9i^v#QbLw|c-n@dD(~ZDwo~4FdQd@YmQuwz;D zOkQF5@$&HH$q#UF@zuad;7^0c2rQfd41gz42RIYy=Dq%f%kc8z@$BA8PydH%_($F; zeu$GHcyOqF*q}|<26uvZ0vgVHPwx!_EXs@NOu#i9^y+jD9)6(k*Gib0I7uO`Ci)ES)lm?IW+RkE6o4r)C5TUe^KSfDl>Oae{uR{Q@=HJ{^W^?Z%@SIe>A?m z@u9|6)c*f*bkE3VM_yL{_4e+Nux0(y9WDZSO2j+rPP{Td-*4*g{DyiPeny@&{h9_&n>?6HS4n*s=t z(mL9(=mW{-6izm)!Icc?KwC;&LUX-{u+JWkRSmh#418`*I-ZyGA;e_*2qkzIPK+~N zR{`O7I%6k=QY-Z&huPlOya z7s2K8h;kqq!zWDF3I+$aMa6>OCSX2_EfNdL6zR;c+DUR5tR4&Y*`>9YicpZfG<{N1 z?1Gz#U4+P1K${~YrNV`anV0TC{1)xr@(A@GPh~yI9ExQKFQAknF(stm?S(be0|7LWd48# zIjJnmrEhyO5L6v|v{ZwrW#FX}GT=uMuTgE{u*FisW*Syn!wRYX7T2w0S@Jmo10tGc zUz?B#32`AXhc49=A%x9@>m-7@JTOwNWs?G`zr_o80Gsj`E(~Hi#ql}-l1XlkR|MX_ zZ_pizCSR0&n0{+Opjyi!g;UEc?Jv!e&ey*`J4rbuvXkz{Ch}w5jTN%XrVQ3aEtMd1)vq?@r=N#_M5RH-ybecxNlDH+@r z<&=CKr$lH{xl^L^C~7CI?UeM{rL~uoQ*yjBb5fXhc{0!UfTGC)aq&yalfxP|29*Tx zAc@tpPVs{}B?&|wW}f5}Fia2oqNyo$#MERZW`qM9SDsN`$=BFT!tBa3L$u8Uz2p9& zNgaNY2LqNkE*3ut1R39#n@>%ZwpXHOmU2tJ#%u!ZE4d*^U3-Qkl7D`cI}` zG4<5c3nqVZ;$J2Xj(>Ul#>P{POUB+gRvA4o@+TwL)t|0EfB5Y~|9fa(?X$I)R)4d) zrSg`u6#sW-7SEHJ+1#BO@|#>a+#I0nY(XjT%&LqiJ&JHHVXv93LLfuD$SwfGl~qpV z_c{MMtnlh17v>IIV{d2EqEy!#J(Fxs#28GNG#;P?$c_}$g3hy%nz2ubL1Kl4Nxk>~ zY*}?d$*So^DPDX*&nf^Wd~HJhFrqq?VtAny@#@7y1$1vV3EiT?D!c#{niP{8m3BUnvM1NNF8dCbk7bfX1ot9O_(qYxcMJbZFqPKLgnjCV})TfXh z#ww2Q&*vr|dtP~r}=twsA& z#hcx&wa4{lX=|TJQZ1O9#M?!MnG_ko>z5lcCQgzHpB4n@HgQSpwK6%}Tw~{K%~DYS z$HI*-_86eqUE(Dm)|`Wei>3;8QH&oX2`stD^5_nmLOVHy$GFyEsW(9d1rcjDE=uY& z-`mg|DI1qVfY_0}4Hyun;Q4U?$Hk|{I`#(N+d7Q#+MCqg{xkNrlt1RH*2D|(l0ckf zDSx!^asvxbnL1Ci*^1i%{HFo;ww_69Z;MIeC6lNWYVnSF26e6MrWkOEz@PuLnigpo zN0S$12g+u1z`dg#)$^shDQcVmDFj465YOq#eQ!nW=v6>{@$U&ltCWKaqA<(sn-fUn>sl|(^ zPz&vZT&k^@$a?3t-YluTEhY_@Olsk#@se|Qn0S;;bc!UmNr5?Q3Y`z4j~5KMxAja? zd;27NdkB;Az3rnR1ebyfH=u5rbPrKU0y&)Y0GU>)n&Y(WqtvgpxAm-1d;4T(dQs|| zbHvSrDo4x>bQ=rrShCP~Ncg-=8+rVQ8i8nih`Z14ZX+v1Wi()d>_@WcWKNiL%5X{R zBE|rdB&e%k@_YDhQgf9fIAK5}X@MqDV!)XMjUHSQ6{fbGAiL6p9+7GWlY?8OV#)7y zrWS=6o+CJE6CYQop-=)PO)PF3v_m3-Cej^2NZqBqZY(*tK`NB|9>C-y7?i`eI$DMCcPD}cyF#*BS!ugk{nvqvW-TvrhH zcbOxpDaq%E45dwC6*!p}oy`r$A)rcBG!RVczVKJrr{yzpm0Zc-HYtevyMPix^v_>7 z2Z|S!GX%OZE-l3kfTb}N4l#L4*F<6r;jQ%{?!hfm5chYx@&CON|9@fnx~V6pE}Hyz z6aO%gjDL#!|9@;;Joe+G-yV%e{`<()^G=d1GgL_xi){4*z^+1tEg z6)cRPh1w7&AtigH);P8xb5O15wW-8Fv%_`PL)H711%53X41tOulEiN$1ugjMMpm8c zQpw0kSb2P7VbP`Y+!R-0QJ4KaTeF+(RtqW^hoI2*xyvXC1(t)6AleYI%Z^zKS3d)5 zHk7S7XK@~Dx**?dP3Ljcs;z~r_t~YlZc4<3b(>i-K0@U^>rv^84W#N3B8Oq$n-wQp5IyIE|yvpM zE=@{9L4a^;$ksZfZrqf>k2=-BY&J)?6eeBz!p`Q!%Q30z7R}ZW_5_fj5{Vo;fnxwW z97he4pzLy(v|vuF(zG=*|9w-XW!AZ6v!)k?vEB8HFpDA{30!jHkU2huqa|+-AjMAs`Gudt5(O@`=&`pEoyYKFlrj_QFOw(j?rul zeq%tw4WE%ah7g)5m>mC6M5uO(3&GjW;>d0bL_4la$~Z^R!{{QZ-Lio$r5t zHk0yHWHVh-+DvZh!xt9iLJ>1$>hl( z^H62x;XMObL%k12OMc31r?EJPLEZd6=eUnJn2kZmp|EZ(DKC_A%orva;1LXF-_-U~ zw2UdaDYH12vl!DmC#yQ&S1YFNrf3}i%1fEXxqKd0<;Nua-T>`uPJf_LrYMmqhQSHE z9>WKtgk};AP@mCY(#i;lXxCkrH_H&>8qBT6UFOl#AF*7ne3u;($8D@FUU7 zlB1HDLg@gOEMf{4AvivYzKfz|Ovyu$NxBGQx*UMns?PV-W*Wdl(b`VRL6PkwT4Q+< zkmDVxWWY!y44I^a$N+sYpCbUWG|pTKuw3M~v_NDX?X>D27D>Ryhw;KMz@i+Eq$xe% z6?X}*JjqfMFbPcpnCy1w0VWHmy(n^;`O-O*(4iY8ngYD7ysR*9q~ynU+X<>h z6r!XmHL109dUp)F{))wTC1&NvGTL~;oI~B6`a)tyqJQ*i zARVDjOQIbae|V7V9m{^cz5YozJU|x6|!kF zZr^OJ4G01)1q?lsY8Ye#xJ#w#04XYh$uK78r+F+7Zdd78zJX);M(ir?_%sG3hLjbe zv?mTH_$+c!l*f4%_XTy4ZgZ{3ad2Zw$MOxt*HR?gJCa1E7HJZ+6yO~Qxn7Vh_7luV1Sz3G!K)>A-CjR#>@$hlJ1YN#F`S(jAgdsU9`GR* zYc4f=6sh_nrMU50-n^G%QvT-oZORi67&1tJU=pfuWJ5(KGN8h)$H}5*k%HKS3QV=wStxSL@lvC=RL|w=vGaE>P4~J&mxN&fcN^#@0ghbb3Q7>++ z=}ffRRIA60eMafXx{Ro}ap+-u{Y+4+cO0N;MTH31C`CfNb`nCu^X4MsmW7%`Rb=X~ z7dZAA6geIU7M=srFv3D-@*0?^4duWVgA}tHcr?GwgMMJ4kz+`)JI5doP}>> zQx=hW>j#d5n^X!MWu9JInkSuk{{HMH6*y+Q=^Bj6VF;)+PLdT?+Y~Xu%L0ag_iZ3o zDWq{?y*OGga_pNW_@Z2Q#4% z!x@Nl5INc)t)alJJ{3U(qeu_`!lK+qLHdy%PdN8z?h<`SuOsjRn;g53unlGWbf`EJA4SWq zl8+Lyn--8vE=d-Sqaq1^z-ujrRJ5 zZD(Lo$xU&IYDIBbE?_Z9=b6-0X}T$T2@~a~WXaPeR>{pGR>2-Y91o8Gfm7T$q(iCQ z#=$@!(DxlkDxsUM=cn|+TRJ)7%Ht(R#bTaB%_N_vFd^^eNG8|LdIXSog`h}>B9#3| z%#!Iv)w>c;;+TA4XA{ioA6PyHwO|L>ak-o%0NXUBImKGK*Qdt&rEqb=(Huc`l9{rut1 z(BBTdvG!kUUiE#|jg=qOi~m>s6cZwaNL}2BOdBs&)+WZb$w6>tO9vI)Q{*ItB`wNy z?ez&-{jZpY>~3vWy~sn^^b4eflAkvI}atkcB6EAd8G=hyaGr6UPl zyQ;LoKh6dhS)6>~(CSPOhXJUSTo^)d6iXuQ4~C3x193By5REE?Gq9<&!NsOGmTZDI zg0M3yMMEZzf|wmeYYpOFZdhq}#D&*?sja`k_2#Lx!NsOGlx#v?9i2BWe3S*nCgK9R zsL|ety+k>>Na02UZg4%DN*ny+Y;Y0s%r`i7^|T=2rOVt~6d0vR1BHN=a5a4w9}bS| zS{&OmShmsz|2P|5WIXc?P9a81XbDu30equHEEt>|) z+|maBC<`Zgp7{nxREp{&idZ1CJ>kX$B9j26+JOm|llxX^@bnP>+skFhxCK2<@ zZ350j!5x4L=ZsqgE-=+?-~;rpqR=^r&~`BgJi+zmskFhxrq`Bi0+56@MNc_)5Rg9l z9nkPZJ(qk2gjwH-oIy`;J)24!{G)7ek@CzpIF~EH2{AVc?ChunXcy9%Cnb1vYXbnI z{f7Pq*E6TI!9PlzBqCZl07rX4vbNwrNv2u}4*)b5w+U<+Hf0Yf*R)I0pvXTAxQg z_(YWg%3UCpyRaxnbakFcO_XLpsg+1k@g#|qsm0qcDn}~WI4l&IIIa`6fYfjE&f%~m zYy=PXJq)Gw;>o^Qf=v_?<)WF9K$;`wm(?&P(*6{GUkPFnjR&xqJKF9RpI zX!EC7=D8H$P$fwh@nrpIa&VJM!KBR7&eA;T%=7nWHWnn+a!$ejRnz>ctloVc5n0zO!LTs)};QB5c!no28b# zI9;hrVGn9ZC;X^qAo#Ywq_DVfVP8DXKF`Try#^2 znX8tK?gkJLBB-2!Sr?Senpu>}qI~seS_CNqDLAUH$rz{7pp@pake{hv2TMm* zDlVoQ0&|W<~vr4Pi;7*YG zqI~seIz%pw0)AdIkPkWar2ncx{UuaUzfCPmvzRKa)zi@iuHLIw@1mb&^)g}4teO{& zIJ8tcamZexlMq^GKp>Jhrr{WU%*i@&0v9Ift)8AuYW1*b^P&iQ=BsBzo%A6|!ob9{ zlwxpzSQI${+i;{2{Dq=b*rdIBdSpM_(f2G?&kr1Y)y&P1`NFxUkDh~HEmR~r z4tnAG?6AJ$Ku21+`j}e13;&wc6PeHa-9vyD61Ehm(g~ppS%(N{P_Txu51B%)DzBI+ zz17pRNv$3>ZC(_S&wTX|Nk_j2rZ{ETgk*3+xy$LRLE{!5HH$Qk)n z856!n^bw?d}u1T}Ie8oJzcV;1E(3O|cC<;}WFN$(gDR zrMG%|HmTLarp=2Y_L;99DO$9KAs`?$BG9@Nw+WTA1=3Dll-iTfEv89(_4LeAtA|-L ziz4}%uO8G$PHs^_utkGRd^_@eB1z^!qJj?8O4L+ot)7lBIh5$ak-Fd$tez--=BtNJ z6m262%Anfw?n%8bcsp)_0N$`XDVvkx#59p|@ zo`@OKkqTs#)@o30=zHVoc`avPR%!JC#pR9PfQ0zz5P8pN6i z86X6j#im+q_4-WG>CRhyp<2Dh46BzZfA+kVW&(!h!;@g0n5hW=j+#K(RUkF&v=l_3 zFiCIq^lVbAhfSLoMfo#dJr8oJBp*31Dbi{38j73+m4KY_9GkHBT6z(~8BCR0J<54_te_162AQ`3n)^c`Rhl! zHed9WVF%t1^Xq5ERfRF2_ezSV`ww_HTd|PZ1A<*0=8YRPzBW=U@xSjBU z3I6axBQujufrCdnpv+2GKY^oXQwiUGwj2NNuFUM4{@l9p|KAO_iT{7N_RF;`)yFG; zTiLx*{NMLCzyK*U%0Vg+j*wQ-Ah!@PSq9!HRzbQCBMSu_;D8mb4^s7cMbd;9a8R1v zOUZX85rSkOEDDGwFuve9Hlq?q^)sD~OHyN$L!LU{*8!>ej6y|;)TF(;`l1}p*LfJVWNVQ#`^?f>G)2y2(WaNA z#wdsLL9&=7*mK~~1R_AhEE+zx#oH63W);@`Nx> zVj~AcVTzy9{fl%>VUzai>6xWg53^>LMB_7Gz1XJPHh2iJkhJDWCEX}6B|$cA?a2~& z_JHr6PCr4Eb&Ck{c~7uN$w+N-B$R%!M2v3er-nXjI7LLn+ZZ7TsF!I#HZ6@@U0bJ33XF}hSf zuC;nP-7Kqn)aq@0lGPK{&-~p>ZK{+LK$bDMENscJ-{~fE9FL^Foqq4gi`6ukpy@TEO|2@+G?@y+;Pkm(S;>ov7{M|%*{Bz?kZTv=K%h==a|C5nF z9$BdWR{etEA0PVrp}T5ds@+)qX!Qk^_w>d3XYoHe6H8L2B;jyFdV*1jggzF%cu@=m z!r|6yk)=n>0bm|4E1~_CVgSAN)kxk-9cy1Z0B>H{r>2=v2-u?$BHnbhkXA={9nc+J z+FXLj5Ng>er;J}nNnZNfY;mbj639TzfjNqREy+|r66P((B*8Rk75WkZk+SKzb<6pmQ7X zeh>ylqohke+!>;--IKLbbq5}!OPS!-kAYnH}G-Ib{n|B(Na+a(rdL&`h=u)l5{LW?TxX&N zTHxPSYQe=t&ij|TEEg< zO^{Gn)>*GpVhsee{FK13QKI~QfGmNCfaX~O#0%&_Ou0SI0c!^4&5H(NDW!=>TT*UA zleBWK=Byc2qWlU-Au^zOHidLHz2c1K(VSpisf#{?S{^;QaUS5+iReKKm5Ua)3xZaN z>u{==ApVU~HszPVzeNHx$G@ffFG*6s0K9(;0@)=?#kC^-JL($_?flltrVMUODVy?3 zL?R;pnP*dU9!4$MTG^C7v$PgXWm8zR=_L^W&9ftM0TT;O?e;L6w&|8vnj~4@iEcd z7C;FC9L^d=%4GID1ehS$Q~7q93vkP>^@9-|yGq%V_pzHq12o@FZSE1^$R;&OLT5(J z14;m44FzU)Ox6!g9)B&Vi@_(Uludab*%Z0YzwNl9gF)<={*i5v4aGBNOVK&=ow{W5-4p+t`u{JC z-_-b>#*4=OkFlZABP0KFWM}=E`enmU4E^6jQSFm8r}~T4nac4q5BWKSvcjX;tc`7z z1Gq^X1SP*M32)wRgl#w$9cH0T2U{CeIkj!LH?F_#mK{5&d+Vaam6`8X9-td z;#diXiTuGHpyyI+kI;5@d7M>yRP^}Ayzx?abVYCJbRI`7U3P!yTBfylMF{y4UN9*? zx}-NzA>p*^!+t0C224ZJlr#meJ=hKRGsRwLGpZXz*CvxfPoNh3W9l{V-a&w1f$^(Q zje~?d8oa~HRUG-r&iPAHeq`jUhlGS@;;B*mk0Y5y{P<}sCeS-jV2ZBdsh7xEHBAMR zpJZcYg-AWqh&w12kcw96Mwqb$0ucpv(JB;(n=1DxyGPEdJu0GRlzewrOpxBCLD~&WUJ+c0KQ{wKmiXRvs=sQa6&KfSQwnO4>fxNJ<$B{$*J=NhA6W zI1SLuAoJ->&1ajAO=7U5!`N9SZ;!7 z;y>XyiAV``jtFr?YO=bjNoUKdqEix9O)ZI1XxE7YpCE~ZZZfB46AmscN={#1I?7}a zorjT^(mJ&pp$P1*g9d0#Q_(32gJzaQEwp>-0(EK3vYIAMSdvmuJC+(k6w-+=5YR%OhrBNLOP^nIwro=H_9E|_D1_$kTS6fKJta_-q2CG_91xtAw0S|42P6Qs z$I$92geJ!h96F-2k+fz>;oFOBB&86#6*chnC;9uckqT!;Hj>f@-C9GoAC(O-NogsA z55>a4s=_-!w2X>ph&xd72LZT|5RnT2r%VKpNTW+#6pFvd(?=;1O;FRqY2~PVqBFK6 z@}S+g1ba+MX+01(E*0V}QxI<2NkHw0AudSj4PQkU;nuQ6IVqpO7Z6p@-tkz~`MO#z zZHQaz%`Y63Eu4;`3tF6xkT_F3TXb)w`ycrN&JEJI=;3-vYQfEM9p#|tY-$?N(r=N} zIlaz7grFI52yybJ35P8ZR-4-PY2}>!E?&53f_5X^tu=~B1XG~`KzdV2YK=S_jnW*^ zM-QkmNs{)d&dFIbOF1UL%SKYlpd_~_ma^&j$vZC_8xgnOErJ3U;tw{>L9K3pt8o%( zO_6d+WQs%>v>V@Ut?GPTZKOQD)v!xzD-{ljY$Xu}l><@|0Um;_ZKU-%rKKk^n)`r) zqy$pfy#SYROk#gPhlJW5l-P-QV1eL1i`FPQhw%FS06-T0A#yd!A9;osPIN(gIVB8g zaoSKW+@gyeK@L$JsX7V=>25(m9AZ;JtmaAEAJH*Nxg*bTLW(G8o>a0?*P#4|mja0? zTqC(gq-(OD#3+lPSzX)hw}ym&i=c+ZO?O;CAvD3dWm zq1l+F_i(LKd2p-LvHTZYUZVS%`$24|m}(LNCeSqAL3lY)ig-KC6GSNjVeqi5th@4M zI=DgVT>c9}AQAn{Lm-`5R-0<|bGgqZmM6kJBRxkNfy+m9D_p4|3`ixIV7>%_JBx$4 z@-ALHQT{B##F&=HAWgh!+^YCy++uijG#y4>B|_8M_nPa4iT(EAe8O6SCgt+~pn@WN z7^o;{Z9x)8^^wf>P8BBpDRY!*esXtJ@i;gcTaGEcn6o2J=vbsxs0V=L>bvx~v%G%! z!Brf^!EI7u;-B)uiRNei!nH{7qnrYoMoj>32TWU%>QL}wylnVg2M zT|gpy)XGlO`;7bhPC8*ZKrsD@axkE0h7RrfNX8aqjAij-sWC9=h$^Vtg&YdT+7 zJ8A7ev2TvF_L6c+-o*|VrO$0Wl;80n8Nd!9JxnW^CjHMi@u5Y)iZHqVn-p}G+Y;&G z>3}`7w&HgX$>$89)hixvc1Ll((A}D3o_o1+Og>M5AX=Y!fB@;aO*a{6^(gA{b*QV5 z^T4hsEaPHBtGRiqLgQJrOgSf?CqNLz&pbe|K=a|(Q0gkGWynkMV6?FV6Ximo$&JfJ zu42ZsYLs$NKF|9nlAo9LQX@LEtu_}9imqK+X_3N7k#wm@f6n&y7Hwn$x+Ej4-=du? zE`=17Fk!roP?AVv;QtgpimpjIso~3&pYmDWL(%^1O%*M{@wuRGp+X>~*e0cv1fuu^ zaEItIkKjDT8)Z2AtlFh~6@;#rMEo;<4^#SUBW4Y_#x2RdfDpk!cD$12F7N_&JvCg^6Qez~i zvlyfEef0tja5|G326#i~B?fuG7XtaFHO`C@TASKrf_S(Zv8abd+U zk8)ZRbSO^o#VePxI#O2fSLuLkQvq&U&1k@w(R28kWEN2nV6LQD4<46j6NC_#6WA}d zRe82Jo3{;jHZMz|k@!Sn5+%sMG~_uGd_~?MG)C#%!Zn&#l9ufGpjh5 zF>88R3XQ}m%Jvh4TX>sHvP1eE{EpDY<_0mdD}yE^NKLAPStt6?v#2{oG$Z}>_B9-&{%0kPUYF+NUpx4Gqo&*M&j+uG?6++ zaGA#7$i%RE(&q=?4j?w9%RDW%k+<`TGg)VrVubMHM9xx#Eb#qsA}Of^2PF#E!C?mP8LU-%XAS?;+(pqNfvQiS%iSi2e}Uvk4|GI@<$Kt&?Nyn!WpN$mhFw6mJ_$acB=2A=owS^D3|^d#5&Rc%zYG{@2lN3fRCcL zpOlaC4qR7J0nJ@k{CE_S5dg7l;nPzG#R*Ou*%;aiBAlpeC(V+N(sNzcMcWe-RVa%S zg|5eP2J=)IjqX zlCIqmRqe9F1v8PxDEtN0OZ+d|{?V2UJ?s@?$aUvR%dWy}!LCh9A_92 zPoAC(HjhPdf+2M+G$}#fF(}ib&gyIxj!QOMA_|(%md-}~1(G&+(d_3vPk)FS030VkLt6a``w3dTv`%gI66COT8}kE9txqw)UAM#YnkH9F822kO+ge0PnY_Z$11Ec|Xo2WHgU5B*KDB4bWWVVE^FL@7o3F9BqiU1NC z?K=@2hR{;`g}K70Lu@1|LdsVS<^~ld-2Vu>6F?`x$|21@iKd|aLOY=usXyS})-y@% zZ80e;L&~IWx7HGyp1L8L3Xv;f7VyL=i6Z=!+EP>%Y$AtJ!#&Q&^*oxwIzd;1U&ipYZ&dmD$*BLXu>*WxMD*q#ZZG!|J; zaCV4n6j2Slw_8FFm2;P3W?mSYF-XT==D!(65rwxgIh@-2)!shHs)l2<|ZKzK+g*cWQj$R$QRfjPYqYHC5Q(I|aCaUWJrzc((qzt!Ite+cH6-4Voiy z5e@B8ss*4VnqwQG)q&`jaHJu=A>Iy0z%2!rtDpjX6NDZXZH92IXbZtM2u$ixvT!UQ z#{zGS-XnhHi$!F)Z8HG4D1_#)jwr_4ZKyY-Y9!ep3aXOBpa-Firkl`{PU}d%GKg{p zW))#2W=$`PK4<~!a2Y`{BZGur#7spT0Fp*(v+0o{qS=F43M19-kfRc>L)UxP<=8$Cylt{})E@>DJLJG=A0n}>IYnuv6CyU;S!e5Pb zBag#ji;RRz@`>$W!#HhIR?hVVc7;$(k_MG87SZIk8D>e8L33c2ttZ`w;c!r@$@>P7 z3ZO3S8xfbMZpa5*={qP_nWaAHQqQO&l*Fi+Wzh!BUppyqMZS@D1eqkxe91crLzhc{CL>e@oC!O!>?54hX(%Vf%5Si^J5eCh76KpUmaDl25UOlc9 zX>%_sQoegPI}mb1E}UJTME9FuPee3X$J+TLcC-=iBv@Xkn5m>FwL@KyZXTk@?fp;c6N=F9lfAu74Z)dEw&_Wj(FH) z&UdhCOKsrv7UkoYw3}K&8>IoVV*bb~Zhwf>GMR#yW zOYu?eJR&mFA_!f)(^axr&Wb4&%mS$OD1hvmcpa1jwR`3 z`r44H6Y&uk+qCILU`qDp>9o!&&LrMrWrlj@^l>3~K6LXp!%8(E+} zQP+Z74tLJ;5bATvy2M(3e~6+A^k!;&*-}hB@c zQNDCm3c+4_YtsutoHeQSBiNRvpBBjhh}4eS;(AM`XOddFTY2lG`Y2zzrtmsa3bQEp z1rJ43OU@v2IWPoGlKjws^_n;JZ74mf)Y9F`(uvk*zI04vNPAw&N*oa-rgR;M3)C*+ zzatP)(~Fco-7gFySO)3Zt~oy*dR=4ZZi5H)T56{uK}V4;PSWZ-!J2q8(q*$8iyI`L^O zU4)i`31lWr1GyN;xxnz^h~X_l0*W*;NB9o4bT4M*%=Q0MoF=MQA7M)Qn;n z$s~>e>c!YkEhR>*w{&_Ysik`{ODDRY`O=y6*`g#82dz!n6wFpZl0ZqhRCwv^)JKhf z@TJqUN-f=sSvrvb&6kd&sf}0^SFR~2pm4d7wQ?z{qm}~+0;=THF|M_A$Qs(v_)*#k zi6y{aDK#RyoBeExVxCX`W%}LaZ{676v2-E;nlByBE4VQ{U~)7-r7hu{h-N1sGLko( zrCiU&Jn1c+o=L^hVbZ4MOaruc5Rm-`4k3Pon!+S;(ovv?8jzrs!lJ?}P(9qV-#R_3 zilxJpW%f;2@*Ldga$K5E=6|OYt?>2?d>XiJJSNq{Q;o*|3DbeF^{%~d!EJZHcpj-E+s zZ&%sdq63-(-J)Luwj$hY$T;++=TK^czCkyk#W}1)vLE_;ThAJ`x2x=J5dqEjHZq}! zDWo|HCD}$WbGhnZCm=}7J4o3ZXS!N5&AqLc=`Qwm{X5aU9@pOE(Lj%Z(crWfX-R33vKb%}8&-^cQTqoc9OS zA@NVB9ESLSkSRFr5$QA%vUucMkb5A}poC*99|by=sw4sX#3<=vLoR~D5Rq-kjIcx` zl<;clpbYh#q9&7=U!dphKM|kIC^ac}x=RL5vy}EIV3-3Aq#!nPR(Y zScGsWt}A4Bz(V$>DgEJ5r{ztx5T-(o-lE^M5{})r3$Ra0kh;~9vo#)}S~rUPFs~g+l-}IQpFm$vCFM5q%R74$UCY|@5h(f{nt zQ&s2tYDZ~aDK^3sbPOTemlZ?#FbPq%p~GeXnuNX{)ojtGiNdHCa-)d+XMUr!Od6$< z>4DBFjU6e#B8$vX0J+trWC2AreD6VS6iRPmpRU+KW#vVNtkFe+NKJ#_k)%TCW7EdM zo^3aA24te2y>)ZocuZE2e*+Qcq3i)?L(#wj!kYt0N ze;o40=`9;pI3~4>nJOD|&ay~-_AZpF&iB=38sI|F+D>DIVK3}#SQfp{7xe-o=vASl z9ns@N;fcK?ooxa-Q}F))F5wDV6yWbUwm&+$@8CVF_973L^dqy& zHmx3voPkxBmaLjt7S+!@sx^J^KZhzI^5I;RqT~W`OA>ILvmrgMfZqEqx)njBGcf6r zl1bCcBKw)+q&}&nwlr(;>9GeYN%DVgilQ!N*J(nU{ZmTV$!B%5&q0~QzDvD9>*kn7CGKE6!LiT-E4 zbUu3q*dLHowh+bC#JquP5%YrSq6bMKfa-m_D#U#TCaI-+yTt!nYHybK{|nRCPyNQ! z?BrYN|M&XwKN`QP@ym^kV-JmfWAwI>kBvOP{u9IhF#MCl&mZ~^LmTO}Q5l_Wt*;;zis`SSrO#d0489S>09pf5VH|cIf`)teENi?LP5VrLb#_IHa&E z4*;b-B&frj|K_UrP7sq_*0fOdgREy zd-f#DA{*Kr>Vox#$|>M5;H`%cEoak3ImBB_g2R3k!gTD*ssW=U_YB!DvQtHNeZF zeYNeC@*-Z8v)9b^9`l*5L(KhM}!d6r&m9AK{3sjd=9fR z8PLpAB&H@9MAtEd8Yr1PT0@Zi#!Aser15|QR?{qQEN!zWG?UiBI4&;PRqAqt)+8PB zOs+#}D^kdx)kDgx+5e1LA_1Du7Gh)ETDU(Epe1IJ3#B3hXA@A8>} ziQcM245D)p(_M=jo7pnkgs};#5MEOKfHLdCPhgfPf#zmO#Xkz9)c*pgOC$@r?*nd; zuQa9AHsGjo&GgLzj;G})X2FpcrUkp5=3g>-RD_ZWgX2xx8=`jtS0 zAkVBft~G7wgqqnDp^4O%*6nn57<>^)r=Xk*Cjzk;ewEfSuw+nCKw8;Ls9=Zo7FB;k z%~nGwk)j0J<+0AzJm9@l3L>DhFx{baW%i$Yf*km4?Yyf5&y=mC{aJZQ{ zYVgZxwfs~o8vU;D4(eH@8X{rUhGmfg?ee1O+iv#ykkUGgcw4fwsCJUZ!agC8z(Xm^ zG@U)0RFl1IrbG%fpDCS(D$f4hn!r{oAshuaXEVvM5Rz|&8wF*KX5L{;7->M^OzyKqkB>&USu)^Rp#wji9>+umP!U@k-2S4KxCU2)UHl<7QSi4{H}u zQ5FG(!4`Z;n9b~{QnsW;zE9*p#gUL5Ws*Xtq9y}c1-V69fbDHUD@`byh{@Clw> z=~+`sm$U$FL<+Rajh?OP{D4|DZE#C(-BiAWb(>ihInXXSnptuv*oGe`d44(0sY#@s zm9l77oRokmSM^}Rov{CzRfvQaKa$ z)YP)*gXS=IWG4YF?Li@^i2247^w@8HMITKz7`cEQ509qnanQc-=x%$-bH-^77{CmScJN)Cr$A^>QR}Eh~>Io(| z``=XOtJl;k=hkYWHSbp5-*^vy{r&Dg{eScSo$fq$UA=OC4MjKPe<~knJUi2Q?%I0g zf?CZop=~rPzmolFNA@S1`lP_A{Bq-2zW1{3_aGVim0!v}w7vU1GU>Kc`NhVwTRP88 z*DF`lYPUxB%pE;^aL@jO$pc5zBb8rhe0-|&+)O@`4m0`jEuF94x5|e<%(UO1f4Iqq zKYUr|t1qutZmiXI;R5W94j(-50Kw;6oF0bzb7WtX&h4U-B4(iX^4;sR?^@=(4D?4K#y;_4H@4S5I!I=|JZ>v`>sxmz!{+!_2 z>?4Q=5^V3W_#ZW%=5BOJg@r+vkAWW zylkMY*+3%OI3ETVXCJz_`=Mqy@7P0gxn>4L7! zNIuSo&bdPmPMvsq3+u;&nx<)O9eS{F;_372l^LG%rP&wG>wbaEp=a4Uh92B-;^}ki zm95np?a?9VSk{HvkDQbJNOm7xpM7X^_MxK(5A9FyOZRUZdhmHCp59cigw@*3(V;`2 z19NRwYmNjGYjsbYzHsQlEhnDdSg#n>8t%YdX`E$DTF3V7PhK$e;P{EBH`FT|t2M{I zdFa8p6Hl{guCCT3*xG$0I8xz zmkd34{)wk2*#!Bg0^_=Sj-&@#`?qI9j@K)jt5w&sH;j&*c)B6;QnjW>S^F`0yegGH zG!}83;Zde-RP3wT-lkEEt>@ny8fE7U%j{H}=6v9cj&3~h^iaKW3Ewt}%pk8fo15h) zYxz&oLfUdS%Acy)pN_=nmL1%mG)5;*JYA99QoZH&n{JxnKc9Ly`_H}m_UwJd=#~>t zJyfq;S*_l9%dTs8%xyoCJ;?)eH(q;_eDuNYH+Q`3##fGBeB!AG>J`AJ8?GlEgJIGB zqw|NNqw{TR_kH$+{QL3M|9xci+!IgnB3{v`W^j`QZspL8XWkl}KJgT9??uc5AQVzY zbspoX!)#X05m+-#tMVU)j^A|Rse4B%oz1M@(W8fsEL?fzVU{qNXAx!Y=MNsC9mH#izHo}$dWBd2-F8*_V@6iMMZy!3&5QnmL?JeG`hmLPJ@zg>7RIBxV zv+}D$$0tubbx-!LS@X7i&CqddJ&?Ud%)mTTp8e4N?1#|xvgdh`hK^62c-WsB>T+1>@&nCp83PsFYnEM8FfxFc479BJ>8!nry9H@Gp3yxW6zVe z&pwc5A0S-!?5nfiNV4CcPHo<421CaiC!UJ4SJS#GGozK6fw-ML-!y+H`^l*LlYjBfI?_J=oi$0R}J&1WOLAseB2W%l(udlphSoQ?SU?uY~t zz3??UYq?8jc{x;8}>EXur98)YiD2jk?cz- z6?hz;{)@7ozCHVCbm@3Utn6oA-Tj$)JDANru`Bz8oY9VbLH3E;vQKbEhcC}8epPRd zXr5=A*JdBNHT#G^ADHK7KeDqo%Pz%3FU$UNOLt%otkAtK`_9e9ch1W`aZ~pb)XO!4 z=Vb%EG8>3>a^`*K;_O4O=zd7fXlG0Ip&PRgVQ&+9&-1fC-H`n$JEJY@qU=vxQ*y6l z^Vd8#n=dXo&S-XyHYQZytwme-uv6DwO4Gv{&hd{T9i|= z|5@|a0vNp;GMHsElt^T|$Ln9;)cG{0!X*r5!qlSR*KzN!e|^65XPz*a>{{lb4#_RqFp(dO*`~P9@&BLXtuDt&`HP6Kbfy>OT%RJm$^8j2_MsYv^ zXEBP3LS#?`M^sEV=_GZMRuPhp8HvV@kVK8S1!mGj63o!i%$mUn37XI`5ky;)PTtR2 z`&8ArivGHvC(rx5zbEJ)-1Duq*WPCjYubCSb6wzPQ*V2UVLiuTGSAI~s0*KF5;hz& zBHu3p==#7f^84}o6=z90BX;K9Hr0=EZl3tSzzIB-T_ zruS*@cf6nXzTbO;cZK&%?}^@mx6|9t`fuy^*3Yax))%Z#S$722d%f15tY@t!t*==h z*Fb>pv;Y5$2OMkl|921I*kKumca?KRbvQANu+0j5@S%%66ZMs` zh{Tvf24trx4MvgBWUKiuB)8IF13@F-&&tnGhaU?F&iE5$zMiWjRmUdeJ6$t1kWkUa z7=J<>`?3${(*(AU}nxrgID!d;eD>qf2 zF=lwqcjYtVwGEgN@+xKA;3*2AZvb)}U(9rA4d@eUz@Lx^N|)2ZTp_PDPBk^+PoT{w zYa6FHJ(%e#WG84OVY1^-WGYom>xIe)eMCFBu3yb)wdfnF75!PwX`u;@KY?uQDrF`r z>+#Aui-_g2hq4)G*mUIzql97??h>wRAxq;+1wQj$+jWsJworxD?2KfhwhMo0h-EPh zdy5!RVQUb4t5?n3D^Q)963Rm1BxMp)$62+{rz&a8ZoEjM=zhnCFyLfQ9 z!Vvi#e~904;41T^o$3qGC2WXit8;_^BOC$^u?YhlA?Z-wB$t^b0M3xqSjyl)&6NVU zLQa7}@;hFT{EpF<$&HjM@Pg=dKIJMQvEf3(sumJ)+%oM#!m^f-1|)nqYK$PdtSwBN zOjo%yOb}MJ3|_W`16Ud;-NUk$;o;jH)e$lrtZ9b2a)MB^oP`>eeCA}fbRNr@&coBq z(vIR#VZefhfpOS`gar*r)AdS@sO@4&S1s!;LTbsuwOy?6sh0Joh*~mI+r@e(V3n_) zmK;^v#Y*q7thbD(Nt;>x(hE*FO%RrU6RC702i10o8_}R?1=Etz+Ah{Uv9w$@*DuDa zu*~IcF>11fV$mO?dsx#FUh~EvW<48XUyOcWG1CtcJ}ZS{ZM9u2@>!SwGV(qnE*m6e=o(btl zQ&NIwvEj8{Sa@X(p5?eV)OFRmm0hcw!87?CWACkg55@-8c46^9QYHtAkfc3fKU38Q|>2#JzMdY6408H1FJ9m)~# zWhX84I4#7ZkQ(c^ni})mLM>(|OB?IAI6a8GN7>Qu^_yiD<4>?Xz-haM(vJE@Wh8fL zGg58+CasqAbmT?SLN{t5f@f94VrinXzCl?dcuO44Q#Ls5qBBMED#dk5p`w@InN)PG zQkdYGw0MKlM|=pT#0H0utXC2fJd-!L#_479uU2O%+jXX5f@c*rIHb(g%`$_O;wnQy z@T`;@sa>;HE5aw{&_BU5VZ6o=A%k-S&!jc0m5BBh%jL`%g;pt4LgNtd8>SFJ5gAw6 z;3!lEYJa`cj9ZlyNF!vJ>MM1a*x(ShD@=hLlds4IhYZ3M+Mn1>oS-b{hANZGm5CEP z6Qbox)D*_PjcjmS_O=1FjcjlbfI4cVY$JOH#G;%Hj*V<^5Xybq4Gw9$+AFOM4uLRO zcz+|L+2Gj71_w6F(Ka}wN~{&+EQdz=McClLSP==FjKBv&1#v1V+H;sP{s`TMsbV%b zB9z23F(rKwh8V-dk<#JE({Qsm?tWs3w)y{TWu2v3k9gT!4YAD z!x$^JKI|3gIyg#rp*b!_C{59}W;Mu@`a0=nesrys^mU?$; zL+X;$8L1Ou&&B5w1^ACtS88DL)#TI3Zzn&O{CM*AV|lku;{cgF9Izc+q;d}aJxt8V?fwbwdm zJ>wneO<1pU9$}GprgtjeHvEY9cJC(dRlcFVnD;gBAH2Wte%Jez=vQLzjrGKa#&!@B zI6roGs1WKOyDk=ud_B5=lL|iyJ{!F-+8Oy|{t{~sgji<*s&DBoX?Gt-{&ezEMIs`5FX59 zc%XgQ*~(WJN)cl{Bbm#Yaq-$-rXdTRi}wgid1evuoP~dWiafJuc+MgFD_!HlwY{tr z-nq~%tgz*&U0o1)MGSeCk-8v->|`OpDNzD!Lg9r+BD_kOQocu;i$g*WU`^v4RfIXR z0p{iD!VSS7+&FT;%S;k(MjWsrD^ukWaR77G=HsHZ-3TK`*lrqJ+f7wR6Oo%BM2#cp z3WZ|71ZD#oTY8!6kHu^@?@wT>qK*xLFM-xa7t?5Ct0u5j(ZebUNz9*R$GkK^xS)#R zLNX_^yJahF35*NX#+GBNGDCK$GBz8%@`zfc`6>q!*sAE%qsl=cMZZF-axj4{igGO* zTa_^D8*Eki9ZmaIzXuc8s_5O89K_|WaOOXoz)Dl?+uZ0C1YOFt0&G=WM9=-nQ);`e zP%&uzJ@$FU;Xn&B$_-0Zd?MD_Ft63zoxjErwn zqP81zNlabrLxLd!U@VDEN+7w?<=Ygg?Z#R%_MsHl5ygTP`*2gLwwoap%SEmrmot-< zfwA=DZb7-nw~2-^yHu(au;MDplKBP8>U3qnyb>0fG9sWbNgK($5~G%VlBxP^Y&>Js zZVJ8Pi8tb0w194)xSC^`}bH69P5Gq_j-WEj#iRY%N}XDI#FG!l@o-z+fe7Sk`sEi zShQUtKZ&Yxc#$~JP+>4t85k8JBvX{6sX@k#sO_Enc2U)FgKK*y9X+Z#E>hb&QN3i& zyNnxE+dJXyqpH$of|+zwLzM_J&V^Ju#to|N9V_Okp&_M%GcYG&#y&E-VSM&$3)dbQ zrF~IKwlRn!^|kOP*!0d;{A~~uwG^Mdoa-;d!<9Jem0yN6*!bGwwMT|(4~Y$147Ih@ z9vRZ`<};`q!cYT*f4lN0-pWTN@`W~m1}oI`=G$VmM+WIMQEy%z`|G1xv2E?ON7~-P ziWSm+rlffDZ9{91qynD2TCv=8?Hg~EUz2b3Yp`u#?U97rf+b}K7a6*;ZOPgr@kUP* zZ7l`l)%{Yd6XQfhKjM=~ON_JFg$JN((ko?TLyyQ*wJeL{5lgi;8+!TNG^N335E=|Y z;#not?6)cnA+LOKhOTxz2{_A6Ql(f`5@LU9An5@?u@`U{X^h#>lV{9po=tpCtPtW< zRD)Gd6udkK9tgfq`{sJ^(0`tKf*Sz$r0z_;i#q`CNG(jwNKHu%O+}Ocm3*GB13sR7 zIC)QUTXJ*q+T`WQGn2EDQ}gQJnitK0{8ImFuVFMgr5oT zf?J`(&i_W=GScth|? z-N(-aM+BpR1A(6fz8v^);N5`@fo0qRI5Ch3j0i+I5%F995B!h%AK*lU?ceBM=|9gu z-(U8R^|$#wzCZbXL2RV%yU+Jw-+O)6`&RhQ_09EFeB*oreF5*w-hJMuykGTx*85@a zd%V|sFC)faSua{Y=Wf6!tXoy*Sl|4APxTyY(f_UoQubEdE?rlplI+8J9*WyralB+H z%HDuJ<96|^2WveKCG4$uUb1${kX)Xk0Dcz$DP$^zF3&>|dn=BY`hHp*S5a3szMtnI zYQ$5N8l8=B&qGOjE54#i9&Rh-CU_p|XK%$}#23JVU_-W2Sg4F}8eznduu?{=#dB2$ zg7a@VEo2;59Eco_Yh|Lc#&H!xgAh)M1KIL=_#MKAs}7@-0=EMS@k}^as}wjLP#}9s z^R&hIGTKKKGTT(=C<*QiB)T)JOe7j0Auv4T+$=>9SW+<`m@nn#YMGXcG)O6M=Y@j3 zfqZ79R)pO|MFgoh98nprMC#G2Q_B9V5~)is?d2?cag0L7nZ=fn(+tBD!kLve%ich) zG*J5sZ&qb4xj{$hFyYHGOqCO~faAqVw)<>$w)Q7pEQ7#h7d+cseyB3Rk%ft|D3l0i z7NVv};dyAJy%le^`j$?q=b?6cE57Z~oKnw2QF|+Hu7sFnj}^2b?s47RtNuI-iUkE@oiUq0Gg5QtcJblO`M20XLPjRGzdTzB1d zT{rLn*!6}&uM{M#J7V^h)5O&AE}1_kuCH{ej(2W}R!EGH4su>OX5G5wd40x5;F&}Z<;lt(H$kKxVLRfD)}1UFyaJYt1hXp( zl|N2_;h)h*JOcBWQwC#{0jIi%woTLqbZP@i<*ZhiZZ=?)4t9qQHX$Ur88$>o@Fbiz z&eIz3C8&X{UvuJZgY+4n!ufrqwg8`k ziOljZcNHtc6~dzs$TX^5y8(|v#Isy48lx{dNMBTfxtYo^Ej7^1XrR`0T(hnb+7|=N zU}R6wU)2ZkO3P;}1Dp_TNueM&CA8XpMxe|01Ls?J4zRc2eRPU2&1_P%lFYSEgn&X@z21rvr6ZcOqM(Du}YD$;0MF<{|9alCU=; z2I+(a=geO+dy!{L)ZUB`WOC(l{XAQe_GaWBjbI^@Atk8=5OEYJasWB!_iSmiHzVXY zkenkI2RjhiYJcT|uv0ExeL~LZlTSG;P!0%=Y$fE`GQi%9D53*6tHXgMNm5rFpo|em za?+Uu{2XN}WZOJj`rDi5h`Z<#nGw&H>Go!X&o!+JUm~BEU$U}m*`-|@uIygEW?A=T zh`#P?S1!M7n6HT=f3^&=HzV2{>Fat0&z7O~ zW<*++1vQ79NqBrDXyqUx$)vLjJa_TyH2w9=b<37tGkZyXP9`^>tT3#mlRS4Z#t1*| z=8{#pNF2y>SGT%SFOKss>s?)S9D#roR00j>WXVFIm>fb+}=zJ zS%PQGUoz`V&s~fYGP191)~b+{OeaS+E8<8e_5cdGNy-UfWQ0Lxyyvc%z1c{kd?`1@ za~Bg9X(X#wTHEEhiwSFlQ6bl@rCY)%ug?f0xDi>(P1gF5L?R8fz6r_@;iC*C$F2fM zpGFNiEra-JmYJwrjlhxJno&xDz=1-JCgt-(lmdC9Z;>m^QwoF)6wGc89nDb+q>Z$U z6QqNc0$Bq^nQeZqiYbDqk$Us8Ycp5NAZ)ZuzA#8B5I2T``@17OcQI=mfy1ZFm?B2l&;?z&>@a17uu;jC=WKCqARU9JbH8tq2T%9EIQ#!o z|2O>)`#;4AfLl2MaK8U!f63qJ`=Req-)DVWeCvFRQh!SQF!hbp7g8Tj)!3oGEOl0D zekz|Dml~W3vOE8Kz9R7LWIg#oPXBL6UYcB-9G@JPOeH;umlHoqd^7Q2VtZl>yYp8k zE=ep&%t;gz;}Sy?vH0upgYjp`C-BwyXX78^3xw~Aua7T}pGgFv6rUI$8jq7(;E%Ci z#=cK>ft|6B#_ZTeA_#M1Q)9zo-ss-wJwy&Z5ZxHPG`cu?Vl)%&hz^Rzq8@SyJQMj= zH-SfWJq&*Kpz-oRgf}rQYLH0J*cEcX$ zYqRX1b2>Wfc`#*fV~wl&FgK+Y!Z0vbrMa+*q<`pCdt3DH7v1;Z*KLbvt&Qlu_^aE( zK(0*%4$Gq!PZ-pfG9#C(<3~# z-mE<2YC|DcQAd8GaqOl``Fk<&5z2Zb!df)azBjARIHO85Sx)X=x?vr6B658EAybSg zMdocNg4Uuz_PyP$ah23oY+^93)}oYsFTQ~pSMm)m=j`rbQEuD@u~K`6aRkW zYAvQ#)HJm=#?@Nf&%PHg!ac6mV!EQCyK~v&t;I?EUUWB#hFg(zKv5Z?zG0*hZcW+^{Y8)Li* zy)B^xMz70RnR1WvFnV21@$vyVYjMQB7srACO?A3910_#0^u|QKI7wNe=NW1aJmjY8 zv(fT;;6bZ!TnetNl%^v<9?1H59tFxbY?Ql!n%bdY2lVQySVvqv7SW zND=Q%`W=}d@r@GK7C5=EP#GZx#4eC86KS<9eyYXRV5?sXq2DP)q>dwA`NBkHjfPh% zfXw}stg^wGQ1gN-@Jb;9Lt4)nuF?>tz>$E0U`uA6w%Dw9q!(68d5)5>-XYOQoibsq z!-O+l81}OiF{_<7C>L?)WZtmawG3%Wl!B!$P2*N#wlY#HGHV^1w#u&Ha3x}`3y~Zj z$+asHi=F zoS+3*?NWe{CYM%cOMl*TllCXaNAjhva%QM9xludaMCz32hBx?Yi$~h`T>o~FI%{#e zeb04AkJMR4*5c9jJsaLWQm1WSZ@R>}SE0}~E~wIBEgo#&vkq}X#|f2d z571g1vbWP)DJzl<^C!I|fA-@~?Vm{2%bzL>BUxX}-cGL!wj#{cay68&x6>iRtVn@4 zFw<}V038Crw};A{hi(JleE@J(CGl%upuL?}nrubh7=laM+iCO&D{|tJ(~6bCwYf#q zOPgs;raymPW&TW#wzsbhcus^tE2P#a(uk>*?CopB-HXVvF(;V!cFv++)TYv6g66H( z=AE@><+WX-&slj5VLEcr#%bQFKFy0AXK%k!BzVM$lp*=y(3OMooqZ^S9rpGW(qwCP ztI0%`xMa+2a{eVx{{nB^-hPF2t#t{PwQ?tU>AA~g@>`B4<)!bJ%M#*X8bL1k{WATX z3(sA}a?wk@mr7T2g^1B%7JH-i_DduPBgi!QeVOw+J`DGb<@bxt@A<+E;eV0xuW%)$ zDnGuXQFx~Oc%f4{AHelS?CncsnzD`FBQ3c=TS7#H?aGS$dcOWz%ubhI&(mMoNG5&F zBw=-K^F78(J?EHuSoTu%Y*RGfEtF?9Df9ApratD_ubeE5m$(>D5KfC5oHFvXsPU8& z;xk;t6NTC7P2QvA_l4#i*mBL5hsf_lL7d*6=MCH2iGgrfG(SgvBJ|<>G*}*qeeg)^ zE;maa34S<_bLH`*Ch;J7obUXauX5Jd-afBcZ8bMsfVmF9(THlh6qut0(4^%t@;KXJ zS1J$lw%OZfi4)Gb&t`yB-uy&;^SKL8JkvYe-hP4}&}w;DSP`& zN^%0TT=m>9Kg}>dO_9H+o4==e$J^VdNz}u-tiadu_}qAf&n8}iOUp-pGFV|G*}a_5 zQF-}jd7rAjj}zmPSy<6osIjX|H7!WlLF6M9xei$hC0z=0HGn3sP}Em&%1o8T*<6|@ zdQgK%{Hkw@9Nm0@@hG!;|s;vGS4vKn81bH3Gy4(ni;ap2q}wc#>(SDepj(L zkl(jqzeu>WlI`btFk)}RdO>yMlAZ@Kez99b@A73;zTHj0?7^tL4bz2aSUJ<*^B|>1 zHcI(0%xD@d` zh@m}1+V0gO68bz)pEaKEd2oom4GW~3fXth6Iw|o0Q+PI~BxoBx)R(O%EdL);fAas* z|1JL)$a!b`Z|3IzQvXT*s(+F{#l8RE`S!5OeYfuh-*VrXoD|rb`Vyz~KbZRG)YYkr zQm3Y-r6#4?i2%Hk+@Jhm^0DNDoY1c&Z{&pj*~vM{Y_fy<|Gvbb#JjmL^U~R1)LK0^sG8{;%TSiQg1oK{mnZ@u9KTV!w-h*W4k9y*qYQ?4sBivDvZi z*l0a5@E_4f@%nF%-W|vN>{1WLacUWM-r*GCUIJ zoWKEY|L+Mu82)(J#_L~8?!eQ+J>jY0q2Wa6&!OLko(Vk>s)s%u+7fzKXl>}i&}pG+ zXk73o!EbQ$e;cME1rBpp;fcVmz-Iy<349=MV_jcUkvYTdi+dpSJD_ zCcICIOPeAR@U{P~Z=0|mM_XYYR&blL6RmAA`*D;NQ&O4!QgM7CWK z6qPiEBV5JO7)8)kAebGj*kQ^DeI*thm$4M;S&rv> zUhik`z_M}|@w`6L-m&rRj7HDv?e>nFj&3x1UXR*4ZZt2FJI(X@XnV&EZ*MeuUQgLO zFj9H7=Ju{YSfx#f?cLW0+dDAc8b%|ZNNF4#^SmCj??v%2pE%jn&sxNzsvKUqnzR<- z!Ju<^#Zr#1^pZ}-uRbh#Mn(irmyh}Mw-!b1dr>#CYmoOUkDdlgpS1|55#>Wyi=51d ztVR9pd(l8_43PGY1S&<23J%b!&_z@w7k3Gx7TS~&rPFr{!CHh1g63iJsid?P4YBV< zMI2Q!T8o%$fA#@>ufO{@>?A9AG2K8!$EAT6lF+SYDAjt#n z*%M_A?w)8AL7qOlAY$*HplT$Wv@Ymp?;dYngX5hAuBiB}3!?V!ai)7p{gr&IA@^Dr zz-ml`mCw2$VedvkIeo+W( z_U=)xo|0#D5j|)2h%Xpo??w+Dz2}73QAnawtP4iiyW1NPF1Fn=Z#aCX1cTq2AZT4M z$lg6j3~jR|t+@hm(g>AYkg|89?980i`_2mCJexzWh*;OH*B-z7E5RVX?~4mFi3`N`5oxeX;f zNIXD7CG@+w3MHWuqM`-%x`h#f3`Sgd&Stb)q!`t*^+@s)Ewn+We2#bpgOi`Atk)}R z76u+!R@q!**hscOrNA2%{UD)|$`GZvS}F8ErM4I+Ra%@OxmY1LM@ev2X^Y(XE+TcC1Aufw@{ z63SZIZcEA(BTTIS~}X6>?u8nt+z_@s+FGauC&8iI@p$+Do4?J z*3y{0bDmLAxlBLLYyIq(ptD=3ajD&+6a4Ru_c4K?cDc-v_}YVvC6uUq1U1N5ClE1 z4YGHlOfcXDi-oguiq_f? z(qee+oDAgNr+A%So1`6~LfZ2>{fv&LpG8Px$SNb$G|lI}A6F5`QT;mOh_aRy!WL|0 zn$n=Dg@#o@l97_qpsr=C$z;Sng>Xh?!&!uMK3`Q5qqVt$3`J ztpe39ufn%R^HUT+w@aO5r6_f24d`=fD0FLal)1K0PM%6Hqq})=J$YIzPS!S}t+fXw z3N=Cd9WBj#Ce2(v%Q^Uk$_TdABu|M9R(~yoke20=o2aZsx5G$s zI&!K^daR#eBUvew0@VzK2xbR#;{wI@J;eOU~TmHhWNtXSCSVePfs3? zzdkzIM!vp3;jcfLcr@{V4Q&qaR}-4(qrT8rKoJtsOZIw?9N^52pF#9RMTqNe%Aj9Yh&<3 zd;wrh@VwN))NJzi--AcroxD5wKyYY~qmijDz6tR2;)<;u$FKhY|~<1?H(pK+GyPSWo{uAl6q4%t9$3cAOimr?&%M?*g^)0?OR( z(@XCL#Qbc5!t-uGOwbmXr=54>>tUp^EJ+c{h3ghXnA6&m(9AY#b@i`{HN*9CkDOQcDe zrL~;}KrEIfn90FY!0Anp;w32afLI7Eo<%4H#4KomRVcdvu^pN`X?(AO7%zP(JE6q9 zX;Ct!odV8xgG{r|;eaQ(LHb|7Lx@@_ZsxNL$}m9WZ4*ST)fGhKHY>^q<&*GSu7|Lv>JHyQH_!A&bMH7^PQxKn_SzM=?jOGRx zn3G8^AZ|gEQY)GPh-c6Og;GE!RTEU669Ji6O;8uOfOr4`=DTTUwnRIh2gD69V5Zug zW8f5gX9JWsVmLZ}0VTeGyLeqejQAGFv`G(v5@Ws(PX#Y-fWlL{?l>qfa)H@0ZzSMS z1L|n%8=Vb^)h#?5U_CA1*=|tg{AGY=xk27Y!87}Uz3B^~T+$b;E6A3PyP{qdZVO!? zW7M0zh~lTYK@o!jp4u1eP0s{8#SMyl5Kzqg#&Bqdu=DcLY&R%81w6rBTsnCg;PGxS z3#EW!<2QOqr%e?QBi{|y(*jO&fvk?+^tpgl7pT)lK(u+YxKhps%)3G9NffT%JsG`sy#0rP|D#0VqxUB4-?55KMSSLF z)YseX-(fc$t)fw1PuRa<=pn}5%XmIH^1c!FZy5HX*L(gA+?`l{)%vyTRxTU8UNfPu z^lTq_A0;q&4STYMkF?wE-(c<@jr~YFVedb;LCi~!T-k2#Kj$roS0?OVvjpKRsWd&G z9Px#L_ODrfW4+$f*WGaAveDOYg!tNJp6w&Pkg%U+VM!Y_Dg21F?e?=Qs-v}G#M*@Y z3$(OpFt0fLL+$o2(B(%X9{!<({frpfjokFZBklGxSldS>jwI}#G4&wM5gPG)a@a?Q z+dpI4<@vG=AIw>E#SN?0uDpyd+pHrOJbgFpqY3-Rmp8c6cSHZwZvPm+|7hHY{t05t z+h)6mJ_a$??NNyzOV~d^EJ#zhG7aa+qwF6b5~?!0)-7AT{8G+Uk6yo=@5XOfw|vz~ zdU5FFg#8q2j4~!k{E%nc?Wb5@N2_4Svk*61L#F#7wC1U|Xw8tvBKDp!#vu<|Tl?92 zI#~@E#$|HuRdR$`w?cr|h2_jeBq12B1n11UbonZr??p<$EE58+wKZ<Vo zv27h>@4>@rc_HE5W#F zdK6o`?LF8v7xXzG-(b<{I>BMEY%!|+f;m9HoI8yGE}!n;<7}L9$Stt8(PYHQL}>-D z$R}{Qe^aUEPm?IJ6;<)O^A+9m!rCE1R?1J;3b3ZBfN()3KS>!}s|@7D#_P7> z1`T-Sszh*QLvOv(OU{LSm$JJ?*=5V!S{hr3=1J+il4Ax*`-zWbCM$52sgHvqnF-S0 zFClZ>DyyFU5FnDN391ek@Ip5zn)!o(?{I-?fB+scW7u+}(mw-a425UI60a-BFt#cZ z2jx;I8Os(Z>RLdC(O^3?Q9sx^)%p1**Ail0CR3o8ZRKOOsgXC11vy#y#>;E1yO!2P>k-EW&i)e@NYe- z1F0Y3<9;snvD6*x>R*?-GIegMhh6=_$-~LtCZ9;&m)w$kS8`4Ag5*N-`AU;G#8(nK691NXZ{pg-<%u&n)1OO>OeEs}9sgbYr}4+*56AC|ZzD#qf%E)}<8$Nr zcskxcZgHCbSF!JCK7g&U&9SRv?}#O%e~!Ko{g3G9+11~~N&Y3#+0pLkFn0A{CZGRL zB43YukqiJga*qGZ$n?l0vg-xHuhR5m{~harV?A)J2aff?u^u?q1IK#c%{{7mo?o83oWl=%_i{OI!?H_q<#vVkex|t3Q%}?mo~>^pyKLozCgNxM%Fx%4dOh68wz-&v`htv)di|fXjdh>Zy zAtojVv0k+D)Q8m$5{}F3t}~mjZ@4@sKcDyOOno1ag=_Y$WnkgrYK%_z-u zGtzR&WrlJlvEtt#PWUQNB$>VrXsSlx7b~G9>Bu$vSh{&?QxM zv9wSbO;Seqw&jdgJ5j4Gvh2$_Ei^$3$rUT+5nIyo`s0-~2Q~Q8O;*{AQ#P6)s{kY`u4c92jO(w;9tdv=Yk z9ZbFbAt7x$K`rSL=Zcj=#I>3br)75yt{p_L9Oc}QP`@x&Uc%y{5}!)sZqwj!?FEDZ z7fQ0>!I9bvNCCM`Q|KoTtYm%g$^)xdAAE!3wHI{#a_NTqeWAf{W%`wHH{injw`c7(B z>eH!hseekXNG;IJf3GH=OMYMT{%zKi0l8#Meg1mlQ;GK{HYTnl=if<*0^ji)#0~zZ zMfxo|l@A%oVo5=upVQe88^}Ayo zv3@a&9Du)#{*;^i_2~W44@KV>y(zkyjQXcXdx!yyiY6m}i@X$hiWtECoE6v*xg^pP znH2sT4L|nZu^u?q1IK#cSPvZQfnz=J57`6d+6%ZJtNGN)|Kjd9Mp-d2czo>zoE7f1 zE?jmk*}+a*e%Z=P`FHX1YnCrtcj=1J3)Wn-e&zZN>zBvH(87svS#+_%9kmzmV@|}Z zYTfc~*n5qO=~7O%a$1&PaPYv|3%ECpFQn)`EUoFv4Q{W!fM>&3T8h&+H%qQB_G01g*b_V%U2tVT9 z@FO2t=@HhrHo_Xuf!{0b7{)j^7{=w?bZIDljWm?xGNsBSVTKRmyvTTAhO+`QI;BgB z;-^rwROuE2{ArLlX#*q!?kdD zkCNZE%C`%D0#NX$!%KxR zLZDmPj3(EP3d4klx4{Evtujz5v6@X7A^pmRHvL-7oB+G$*~&J}EtF%aCG^!R<7z( zz<9n~I$XH3PO8Iul{lPyH4^4+O3BRWd*9KsgLh`e;M&h$ zJbOjHvdFVzLhX4b2|>n&G>{~UYc3^=;+j>fyVk5+(RIbzYrC&lcWJ`2qqFurQ-*xF z&RLSh%P(KPj`WDj`y@x)5ve`Tv?17&4SRMlq)ZWs^<9y#9Ov1Qt3A(*SoXF&MxV4{ z^|CdiXKh%y>ax-K${C&=!)gbZDg^zgdDgNuSB`ccW06V@Fi)B>!X1Na2bd63n+aYO= zl!Fvf{Kr?uNNRMFXD4l0L7O=MAn_OOFM4*;mdmv*Sx$Kz@7c*~U8b*aeDb5o_a|>6PrwDqlam>49e5HiCZ6U*!Men`i3N#iiQ!}i{0nCZ ze&n1b*b?6qUl+eDemb#*Zej~z?i@T5do*@`?ESGDVwaI4peL4zjf%yiuSE}V=io8U z6MQ`Se&PvNM$e6&M5cgo(c{Pr_(w7Yd?&Ko`KG{ioGLgwGLQQQ<08jJeBs04ePO+e z{|{+U9ee3xJ#efC{xN$XR@;vih#gswJRvg^wf!h~tVT9FNM18!CchgK5F0T+Pku+? z^E>K=L#}i9eII5Z<YYa7CS1lLwvuCDi`_`$=Sz^bF zd{LR9GyB!{VK9uctod^n&OUw4+-!CJ%=X$oj0POI)0VB-fFrqV&6>6AHTU$mnQgUw zYsC@~>o!mF=c&2eV&S`{uPr-sU~S)OIr3^i>~0*>m_0;jg?ld(pDJ;)wCcO z9b)BkrGILs&@ppRZQnaJ(hri6N*W0~W=3lJus|@@`67gLI&KaYZLZ9Q@*aM}5*3>+ z>Dj}3nf;fdSfE_}^?UY2Yx}TF*+I#b`+N3K8jIAF_IdU&Ab33(wz8j0exp78Yy0qf z^pZ1Mh>~}rFhKd@{HPtvt;S5H*rphEELdwlc@G1lZjf`$S)SFiarTq<45{tIPCfd5 z@*XUu^JQ|#24|j3Bx>@UtF6PW$&edVjH|QNxFS=2wx9${41sJH_o%hCNO`bBN^<3j zXJ@o_;0*Kn`RV%mX{HVwr5tx|h2<%RC05;3eO{o?!n-v;wNh(W@sb0Sj?BtCU|!Cs~upoh`!{m zDY;XN<%P*O2 zrBW&6i?g*qSsDxiUpmF{E>wnkcDB_HupICW9YIkj%vVA)^P5hrXXlXG0sJ>+_M4@L zXD5yft{jUhTg73SDjSoFJv#^04zL(FAO8knp^d{&lXu2FLK1YS6M}5l{YrtdP2r%; zLC*I%mLRW(s8L2roS8 zo_K9Pvr!~>nJ=OF8>UKztn;Vr* zVC66ph6KMiAM0Vj2*|0v$W$fKu)MS`D2FWAGpx3s^;ASrmZy>8vxrD6PDNVCAEgE8 zm;J=$*hy~sv4lFfUw)EU0<5~hUaOMhf6x75DG;NhNAy&S{LUJrdx>6k@hxza*M35F zGzP6r68lmamAs!dX+lWl%rv1GVkn^Jo^DAfSfWsnu%(ipAxsA=Q%R9cuto@U!P5ME zxiV84fuf@kBv{B6M>JVV z5b~1}#B)U$O%xKO0we`){*Wig?EinsgU9}C>iem$q&}OvJN3TQO`HHYI~7VEA)nv3 zlb=bxCwW8iLQei4uP6V#oc#X{-~Ib$;tPp;6CdOpz{2*(e^e-M9+EdC$X zQvfUDXUAv9v+_u{Fw=QjTRGMf2bqzYnTIAD$A z$M?4JgT)ugSnWv029>y0s_&frcyL>Mu zYR_FOhM;6L%Vno24e>B4V@HXP#a2s7vtDTm95^n`P|nvVXQC@bwsnE^q%h80>vp<9bz;k1y;9$Mq&rbh0b z6C9YVZCvj3Am<fg=93#R);7ByLJq)rk&IlFLnBegV(vz93{Cx zNi?IGGBI&5zNK3&31mhTdb9`KhPF132V8!?1$l7zKyxs9} z-;3?F=T1KQ@p0da(b{thOwc2Dn(xKYwdYQH`{U!56F%4vn>xu8mi@5RBj z=jJ-$7{1WqO13Oj?qx9&GAVLEd@N6_L!Q{{O^!{~_Ob}^>bUU31j=RoV-vN#tU}z_ zDv1a3#r|U>wY{uDiI?QPQk*4dnEA%WYkOIL;sHt`(a_ko+Fn*4fdX!bz)wCpFhJ^H z6-u;(bLO1w&$kI3s}MR@XlHD!wpW*;rFVbiQJ0npf{bmi?PX;V4S(~^A0cIl;XCWa zpRprqds$yq%Q7EWy6TM|SP}vj+glO@#}2IRW%V@)bItPAm#orn0k2$h`Py#6@dRz; z9}|`D?i+XOTCslhsdmgk_dAtX_Po;j6I!HeLi+vv;&;Mn<(|!AV-%A`$hNHKIzYzLL z$itTbKkk2@e?xq1d|GOCa*yQAi~Tv?FOUn2h`*YMCr89q1j7EqY+_nX#F%C1l&{@=o*~XZ_9l<>>FD zfAikwz0-TMZ<=qcZ&&O<^ilHg<$Ry=U!447_+$Q4_>y6VKOQ?SHqqD5y3O~h|5wp% z(H;IL0!w3e#_kKhH?%S^KjI>`%JP^7iv?Xvi zryd>)?u@oarzW0A{3`gt@EPIhWGHM4{l!|9@`s)e{eT+~yZCOwQ}OS`_eFmkUQaH- z4+am0cemMxvn;AcMhf;G>ghiN60>nY^0<{r)sO%j&;(1}+yJYmzel17R*X=ZL=lpI z>Qq2xy(uoKyU6&K9t?=UYVnjz4}Swh!Z{#3C3}Q`vd*Ry#-1(4gp!RO=mJm6tMg{HVPNqRm77-)jp zOLZBJLrCLRMN&Klh){0vly*wviBCE_tD4fWV;>+v%2shQsVV5x-bk9(o4x|dfo`7i zngU`4x+|)u&jY;H&69W6CJ{evQgW84H+?yj1W}uyCJqBUOyIE9B(jEd90tVvbyN1H zOMqCuEl`r92ro85%LIadJPvbid0p?wxB)(Lz%MD69 z2SPd31?t)aJltkLvt(q3ECh_ZKuz~TlMc;rgOZE$M!@MVQ1c7|9)44Yi~uA&@4S(w zee4Zf4MTWT5efuvc2cA?qjuh{e(ZCCj0J zvl_KaMY0SJ0-o3aC3m6pr+_CkK%tc3_$%P?Zji1M@bF|e$U^8KqtD@SE-)hlC-QTw z8x#p9;F~6xbX~I|exdaHMj61XLurrgDQZi9?%3N~hyrHB&1w2%CS44kGMTc7B4o?AT z{Nd*GZp``L0=lQSRMa70#{EVz?j3;LE|5uESFpJd8a#gvrF$Xhk`Zu9qn!+~%H*FB zqTri0&HtcrmVoIWmR09aKu1IyQ0kRcd5fztniuL0KsK711EHhy5x`}xdR4cI9=zB^ zsVNt42fU~+SWgRx&m}xH|E4zkWkAftCa9ezAih_#cI9aUV(&FU6+QxD&^3!I&ldqP z>YAXmvqM1ax@K{G&07F5?%ZHKEg<$?3lxcT0pJoBs6!?ohF-HG%@g_;z(p=lC_4ZT zyZe)mjrOKV({*^fi)T&*+BJX^+@RDBcnHhZR3z$Nz^4JLeJShdj{;6}fhrFL9N_|W zSuvW2+52j}rbzfzfNYVqK&c4u(Dep1D+rm;ISfiioSL~rahYI;@OzrYb&TE#h->5q zd((i2eJ)U&v;xrY0=4mghwyir#Z|X12PD|g0!8Hsm~(+TX+I5!2OyNDonq$Eb%zr! zO09hvVA2iBy9?;Z1fz7sT38C`NF-xdNb*p?L)W<~64fldbf_s~MIOo!KL*%bzYQ@Z zU{l5dqDJfKM**ACSSZB;5%Bb8MGDr_&j6m$1gS_GFW@On(Bb(Nz*C!`k;ZAj!$mhJ zulZ%bQWJE*-$r;sG~7(QX1Bc8oWg}M@KEcGGy`8fEw*&)jcB~|l7Nm}Y7SiI zB)}V*wL>Y&QQCR40nO@=(L|vfcGjjTE~@#@P_AqiH$C531$<`{geNmuz{7ncn7k44 z^H6JkGUsL1Jpt$lRjr6=QwP+pT+3J5wJFzc>6blTkl4n>z%}Ec?j^h(Wv?AAKotg`;#Y|$I8E?-q6tqY9>$M;n9 z_REeN!yqy3upQH{13Hd`0YxwY9(GIt1tmdcX9&tSO6tx4U`tXnYsJw8JiOdhJ5#yd z=>xpn4T_g8;Q4M)L>O-N;g;%h+9{x8$LI{x5W_bC9Xm!r%%FN&K(}?G>TnQXAL~Ta zp@2>0;dmEmiH|hZjDR9;1az!+Q;`Uf*8m->-84y7mw=9%F>*~7(v^UYLNcK66!2vC zy9?zH0T;MIF;xX@ssyK<9sdQ`QXY;MB;dS8antzTj{gRn>jI^h6zn5FMKHYtWgk&1 z;zmHn2Q!TqadR5u-WnsuUHbDmz}9FwDnY=ed*HZ6>BE49Mw4VJN@z(y$JR8REIfY+ z*qVxtf)~)SjauXWb3n&7Y79qjTEIT$niwKK0dzYF;-w3C*zqdOYs$OxnunWSr5K>} zQYU`hVMoxLg!&S5`8D81^G1y!?oH1Eys0l(SI|*%N~wun?}gIQD6QHBytdIxjHc9{ z?hm-Z1&ZgO;Q9tA?UcFnRVc4%f=10K=vbx38{yqMegmarztVLY%cv{3s!?2OmtGnQ z<&`c_BNPHQd){eh$E$$NF_Nh$ZlQpenBwN$McOO^bo?vx?y`;qbo?vRPLT`)03H9T z0oE04`d7>)&8FK6<%NxQN{5JK5O8S&lqQK;DI(|s7szYY6}-9u%KQ|IM;6z*CJ0Yy zl7KB;?o=eN*+-YlzyZD-a23~qdj&Aq1gS_U1?;m7#k`mf_{IgRU>^-Fjo%Ap%RbP? z_bS-zx`y!o8kEgWHm}*67SM7xNxJTffL<3U(JloeE>L^v3s6RzARQtWxqwX(Btk$Y z?L&Y~VbvHoz{9t?it7aX9F!gxs8dnEkO3QWsh-{m7fC1?!z-0Nq0@`bfaTO%GgNQxx9}z^1S6bh3aiJJQAoE1J|f1kjPR z2Beb}?4uEoik&Qkm;2}rF%|`E&Sa;PJ7a*&Id8yT1^cv9ct)XYwUdEky(>7`^b$={ zkzVhNKsluenjseOuv>a-2n?Iy@ERATnEq;q9Nz@#Pnq5`;pvvNN-3b*nUPi8IT>5$ zmN(iv|M#A0(!^%t&3f;g4>+kW*el>+cjI+3%mH*yFtw}%e3J`(0$?lp;7mopmmMRj zAu4;*A{iWW%Yb5C2-ru`ieMQB*hkXJG8E8}pN-30D-^3#AqDPYqE6FJZ7==eQg z(*`qOT|vhNYZUKIuZ7aF!3-#JUO=}EhAY*ZmI>C!1``n^pksqIiua}$06I2U1FS3P zw!t)>vKUIY4JPraw1AFf)oLeR)60%!WkBgq0UfWYQBiOD-vAx|%77wR1f15G45DSQ z6*~?9RvVzitR*-upc79qh6Gkn#~%U94N9Y=>I&uzs1>QrFdE9N3)H;|0W&U8Dc=Lw z?FQxD1?+Nx8g0B8aB5$$o)&O|0ZluROC91kjBkMQM(|W{jJvp~i8G+=bc51M0;XM{ zwzCY_;Q}>2CE)13U~hU9;3zjJb4kFFeZk)J6u=RE!McLO4QM1S!awkSD2KU0=DdPK z8=y>YQ4{S@4sn61&ID|CgHU#eDj)0yd3OZ|8L%;JdebAJ9OwddehPS;3)J}OJiq~N zP~Kg@{%%kdpLl$2Zcr!%Ou9j#{8zw)8}~$3ikMC zn!R}I_ufBy56E7-c^9<_eV z>4O(JuW+}-5rY2Ukf<-(&S`?vqf?`^IM47EA`y=z4kv!aNrXe*FMFS2|Nmh*6Cm-0 z&<|2woC260-4T7X5i;m5*V8`*Y+y&AC3(ny-Pe5nk0UARv4fwLtb*4$O@Xi8sI@vT{h6C_OAA>-&?>7M7Xb>pa zvQ24{_{?92vSphZ(?P*LB2Pp!yF*7>B2Q;XT|r0WnRbfEn*gOF@>=Z_uqpBw4&4Kn z$i8AlS{JArYk)`Ir0T{4_EB{*=LK|BolXYz z8~+>7QFR6sN&(%fPMh>`z&@%@8ZV%u>Kes+(+>bTs?N+hnG6Ctjc<%*J^cYd=QRx| zy(FOHKNwKvJWBD1<3E^AmN_rrICm$@Dn0>ltQ(X!63~f3nBp?M-wWtOD@;W)!31pC z&}Q7z(#dbMwiR?d4O3j~v&~SR*|+vy1^bvy()cA%y3Ho0VqHPU7;5$Bx1n^5Ap=T> z2souN8Dwco;|~B%c7rlTfG@9WfMUr>kjnzR+6C&K58#oC3lwLv<0pXS2I$B-1q%(( z;o0#^DD!Sm0#bmlxW$^j=5>H>0jSFm@W@S#;te0q1Gv!*@|vB1ue8K3(nc2SU!inN z+tzU01bCh)ZpK|K&Z_{=cY(S#0bl7O{AD!P0QON6A}RroEa+Rjp1v6HWEZG!baCvd zHyjE$;+R&A!L4^@DCihrfXp{c76n^z3YDDo&Tc4MaSAiy0={CIdQEY0O{W5S-5~Q# zz?YrbZmee!DSrTT)&<2Ke*o}E(Dg3bKmP<6aD%d}1avf!>0a^fsOX5Jvs!&FV5>(24ANxfPDlUaAXtq$Q2v$u#I32iTI^PA>_#!QCOOwa&4C z*ET_B5oV==H@HDwvoj6ljc!mv5(2gy52yAHz^04R2(kda+^R^8qTbFNV4o(*zyZGE z*eW_hBT0?@H)6x4tS;LDB{Zp`-HbPu4TuN%Wr7x2hg&Bi-(X)xf~ZcyG`z-~7v zO&SE4X@c~U%ufL?>I>F8j|05e4Kl$LJiZC)yVpD8P@d2P)mrFPaJn00sdfgToZ$u~ zycJBn%k!20dy$sEb4`p@2_W@hN z98kms;46-WY^FEsDDZAT$3iwu;x!d?iy#%JW1#FKf<#0M*b*0x?I(6`Q(TC85=|ta zTMw$sAXCw;2QlyKX#rO>I;5d7lYm$B1?y=6FK>Xd-f=?$Hv=wjg7l}nk${&pKzQmV ziXU*98x-{<;6gVjLbV^@X)aLSNWoKGpz!SY9h9fIK(!JDoMph)Xd-QnoahEcjfx0r z`aAMQVylR5o#v(#J56-!rHzWjH<8Bw1@JOAD1O;rSVEKjXUmA<&IDsJXzlg1=%c1r zFQ6DO0`|Bol66!9oaq8Z%XF6H{{QX%k_Z2G3%)E~w|d+}02&^7@Pq#MHBRr#$^D>T z{B}7HAGlji_xJVb{lpJAX@83UNw(z(&?$V35ExYovBQLx0+!dG@J1+V+iDg6|#Y30420QZ6 zS$OvCQJ?$XLHJ+8_sHFYKKBm36W;wl8=+%V$9mu&p$CwIEw@MQ$a?22z@~#_L|(6g zt=I?RNhHznP_|+pY900}II1b0pcDhE3}qj`MRY2AeXlf?w`dErZ^sLOO*JnddPPCU z6El)sv@dG@6~_~6fOQ2OFVx6Z5u?9{((yuD0$0GM%_sU#HdvkmY+8o`icS@<<*%y$ z(3=heHvLs;r|47xTmGu{Qg0`*>Q`F+s)_q3*vA7G{kI3oJ|4K}KLH(UyQS5-03Ex$ z_3i?WGdz)>qv;IsNWhLJ=)94Dqnn^ZDdRrU z4T^7h1aO24)OeqO!wqN}FWUDrfWzFN7()UM?F;s%@pE1o;sUkt0=7bCj!!ZLuo*HF zMN21l3OL9VZ|x4f1~||S5|Zc;@HlrxVme@(zB0fCYFJai{w`2Exd7PK7p$iR?4#zz zBgz7{)I9B!*qnflHKTT|s zx=nD2Hg*blj%j?OqIxGr+bicbK`bQkhyY)0cAdx&2Cfr3^3`T13n*cZWYT_>X~XBg zHJP|)k9_+2RRoA2|EIk(jgz7}*LYWVS9hPPUf5+3#y%~If(j}kBC-ew1A>4IvWTn# z0uGBPqJRSlMn-Q0_avIQYgFz90=T2jbi1LsE)XzAjR+=DTytG;%k#YFouT>NFZcV+ ze0u(GZKuvTb)NTB^;8WXjdhB#rP#2s7rAR~i_Ia}*o%0nGBaaPdyy+4pQ1dEp!OoE zG`W+);32(;ssGOq{9nC@!Ny+1qhp`>JqR}TA|4&pE5Sqh1~2MLta(V^;Lyk5A$@~u z-hp6w-;iISGuWi8dU?Cb;t9mpxw4?#iDoeCf(I}E1qf!!gG_%JY)pR}&S*D+jrW@v z#AGJVVEJT*jcZ><@Ze-dM!BsL)Vp0$^YY7}y;xK4i!}tdS_Ti9_OOkvM6i6?Bdy7x z-g$8i$6A6xe64q04(^k|X!&WfoC^{3%7d(V26ZasYKd!(M<3RyR72HmmEdx>dBcIW z*w?YbsFTBk`@9@Mog6jf-HR58b}uOiqujYLqM z#43qlo?EuLw9zC4N0$d#^9+t^*xU^lTk>xqII=v5nwOwX^<5?N+^@!( zI@Lc|^9&9vJCOXKYz2Zk)o*CHt(L*c@&mEv0|bYZ2k8h54lds(*Bp!BS!F@V78o4V zusM4&`}~^-4lE1GBLoHqlm)q_kAdZ@Gs}WZ2qoCJA;^o1>1b1|*{3|nF@(X>UC^B- z$GX7?_9_d?e{V5(T6vK5*%|YHuVu7%B=>c0e{N51ckb0(9liqiNbauOExGIPCBP-Q zdAZ5C(YZmnp1Cf$id=KN74SfIKi=Q}Mz)lFEcmDx+Ov+yhY5!wFP?%7V+ zcG+y!!wh~O-sQhNvkmVFyf1TG=Gx3vnMIiyc(eb|Odq^2uw$lmCP@E~{xrQOy)*q% z`swtB^gZcY(%0bofz|10cz597bT52CFrRLbPKF1#TD*DSax>3NF=NaSd}FYi>1bM;F!f{V^VA2aU8(J< zXHtJiJ&^hh?h~v`El$l!O-Kz-^-GY_c-hJJ~f^z}FK@;`_v>iT4vb5-;GtBG%&z3O6RMPAtV65+)}`Ck7>YCb}dl z63r66|Be5V|BnBfU+-_l|4`iSU+=H-FZE~Rp34Zozu(>O_BXP?7i3*h0as@B`ByLOY%JXA8|mH!gYi( z3fB_ORj5q@Mk}l#j8eFUFjC)mBNZmFnF|Wz307eoA+K;A;RuDXgbIZ! z?yZBu7(#o6a|wqlj3%^G7)5BSFp|(l;T%G1g%O0q6owO8DV$AcsW6PtLSZPOxk4qO znZgi4Q-#5VCJJYi+cvU3sGF+L zhA>5;b!5G%n=IQHdzf$PE>PVHdXnmv&|*QJu`S}JPAmwzIkZ?1bhCI+=PbzBraXdk zAY+>loBwh*@*l9}#Zdrz z2;wM!_X*-CfWH#NQ2_4|I7-h@$}BCWxZ|{!9=@0lY;JM*-|6 zh@${@5yVjdeHEe297-?q|C7 zr0djP2PRcdqV}-4*MX+|xQ=n{Hz7fK+_jI_z8Y!Jqqsb|+TpiiAlix^FJO?;Y>_KJ z>b1L_Y0G9bFc-D`ED<%EG0+1dZJ!+%Y|>$^ZD*t-o21q}ug$wiIyPY~CLL{_Wnysy zoR2#&JFEQQ1{uzy5%YB9!C)Q6PlKv98*$d_Fe(@%pypS;S?8|!h`G6Wunt$RY`(S% z3(&d%V`ll`I@B4q_RRRm%YLw~rLJ~Pr5~=v_$`aNM_%-UwHR1s4_(p#T=N1sxZqk; z2qA@+?wGac1>AS)gLqJjtC$NCsY*Yn#pNg9rTwrL*9Uj)f5TnZq8srH{Jl7XTGRn| z;P1hL_HG-0*SJ_vt2N;7M2pn6)+KMCK5J9DNga-fs^MY$r%|#s%M>0(>#Wh%@vlQ^ z)?gr!)mEc4YjCAYX_m1xYjDlUqJ_ zB8wU+PK%lv*UKZ3M<<#ZHwr}}TPFrJC+bQY(Ihq96gChrOvvUBqe5yhOb9$gXrXW; zYQ6@kl`OfT(l;0~jpuo1pyRMdK=!WiQyd{sjH8;TI6OdgY;M@zSlk+hbC{EdL9#Xw zZE9i$2^VEH88sOvGmyC084VNBlo4r+pF#qKph?D4Q7Yp_{iFwFjOX+D|2y#~Be^eg z`!M_8p4*n&n7cQ3Ypyo8Jhvb>JvTO2iSPDx%@uO3awhvderNxFc1QMw>=XEQ-`&|; zvbEV2*+toz_96o@zqiwWOh1?2oPHqvYdp)ZNnetlnI4}$JKZ;ZQu?TL8+<$9 zr|^q#Z@4RbIea?Y5Z)8s60X6s{rTavuqqsaZwQe=m zZLln;#+L*}W8U8*I3{QpWXw;P_3tq|%=7sA-$R)5|I*Z$MP`N>Ybr70?`jIBl`*OB zQ=j4+fICvpryff^l)C-@FyG&hygPYQ@)yY~k{2f@Cr2d*Bzxe07uqMACjG>>iH{TS zCSFgJ5|3lPe^=t>#G1tN#KOddiSdcyiT;UG62~SwV8);D5BQ(>@B2Ibm;9&j3yZt` zoBeD2EB$JJsz1gb?Dz6d@U7p<58^+@zliUP?~1>K#{2i*ziZ(CVGV?(UNX>og@uvr ziLfLIX4ESz5Rw!VG#_M2I=mOEt4v9U_rk@{F3F4(<{49h-Qo6Aq%e1)DZ%YT&lwk# zB*~0M3KvBlG$r_fESTLqD8UYBI)z#AnUbWLfSClzGXWP8B+&%SAh=93QkYI~nP#Lg zjQ|Ugy-g*!OfyoLLU5U8q%fJ_GR;Wg0)oplBZWyUnq-M$v=OEKNjv zg|pzrrJTYb0!$NYd4++<3QMp}-mfeSfKIDE6B-^W@&4FAa#C#2k6Xi0W$V5KZ>NXTljaAADRVihJ#wewQ&Q%J9Mk@tEqm+!$NTrm}IZ8>X?-5D~q2Wrt(Ai3HpQ|W+Ef2D6hL9rhR&l|pB z>-AOmn$Sl9&p(4AvUXVw{}m7vk+%!rzwCqJ=?Zx5QBV{!!rt&M$Dk-)1o)gFRs{Hr z;GD>_pAwuEdG-^6^CHjgCpa_m?8gM>MxOnMAa;bk{e$4_$g_Xv#V3A*C4VD`9|1lj zh#vv=5yX!GdkM$sDSSZaqOgY`euUNDCx{;b@Rt0bD1HQZk05>oc$Xl41bBzw{K&I^ zAviTxszxANO1P#*;fh9pFH~t!5Nfiw-cO0dG=)v zXU?KL`x3!>3i(*rFlJ_LEm=yFA(Be_hk3)+^K|cm94h8)vwD=SB zR^QZ%J3()O7H@*y3@z3K{Re1qCg@GjVocB*p~aV=H$aOmLH`%DxDxb#LW?OuulIv` z^(6ZUrsMVMN%moadXjyJpq^xZkNXq#>PhxNf_jpDfS{ga?prgmiSAgCSL+X-q%_BRBzBl~ND+L8Sg gLG8%iMo>Glw-VHj>@5VbBNXRmg18ajrg5=<0p@2thyVZp literal 0 HcmV?d00001 diff --git a/src/tests/databases/mysql2sqlite.py b/src/tests/databases/mysql2sqlite.py new file mode 100644 index 00000000..2e96e499 --- /dev/null +++ b/src/tests/databases/mysql2sqlite.py @@ -0,0 +1,244 @@ +#!/usr/bin/env python3 +""" +Convert MySQL database to SQLite using SQLAlchemy. +Uses reflection to automatically handle schema and data copying. +""" + +import argparse +from pathlib import Path + +from sqlalchemy import create_engine, MetaData, inspect, Integer, String, Text, Float, Boolean, text +from sqlalchemy.orm import sessionmaker +from sqlalchemy.pool import NullPool + + +def convert_mysql_types_to_sqlite(metadata): + """ + Convert MySQL-specific types to SQLite-compatible types. + Modifies the metadata in place. + """ + type_mapping = { + "TINYINT": Integer, + "SMALLINT": Integer, + "MEDIUMINT": Integer, + "INT": Integer, + "BIGINT": Integer, + "DECIMAL": Float, + "FLOAT": Float, + "DOUBLE": Float, + "VARCHAR": String, + "CHAR": String, + "TEXT": Text, + "MEDIUMTEXT": Text, + "LONGTEXT": Text, + "ENUM": String, + "SET": String, + } + + for table_name, table in metadata.tables.items(): + for column in table.columns: + type_name = type(column.type).__name__.upper() + + # Handle TINYINT(1) as Boolean + if type_name == "TINYINT": + # Check if it's TINYINT(1) which is typically used for boolean + if hasattr(column.type, "display_width") and column.type.display_width == 1: + column.type = Boolean() + else: + column.type = Integer() + elif type_name in type_mapping: + # Get length/precision if available + if hasattr(column.type, "length") and column.type.length: + column.type = type_mapping[type_name](length=column.type.length) + else: + column.type = type_mapping[type_name]() + + return metadata + + +def remove_indexes(metadata): + """ + Remove all indexes from metadata (except primary key constraints). + Useful for test databases where indexes aren't needed. + """ + for table_name, table in metadata.tables.items(): + # Create a list of indexes to remove (can't modify during iteration) + indexes_to_remove = [idx for idx in table.indexes] + + # Remove each index + for idx in indexes_to_remove: + table.indexes.remove(idx) + + return metadata + + +def convert_database(mysql_url, sqlite_path, batch_size=1000, keep_indexes=False): + """ + Convert a MySQL database to SQLite using SQLAlchemy reflection. + + Args: + mysql_url: MySQL connection URL (e.g., mysql://user:pass@host:port/dbname) + sqlite_path: Path to output SQLite database file + batch_size: Number of rows to copy per batch + keep_indexes: Whether to keep indexes (default: False, since not needed for tests) + """ + print(f"\nConverting database to: {sqlite_path}") + + # Create engines + mysql_engine = create_engine(mysql_url, poolclass=NullPool) + + # Remove existing SQLite file if it exists + sqlite_file = Path(sqlite_path) + if sqlite_file.exists(): + sqlite_file.unlink() + print(f"✓ Removed existing SQLite file") + + sqlite_engine = create_engine(f"sqlite:///{sqlite_path}") + + # Reflect MySQL schema + print("Reflecting MySQL schema...") + mysql_metadata = MetaData() + mysql_metadata.reflect(bind=mysql_engine) + + print(f"✓ Found {len(mysql_metadata.tables)} tables") + + # Convert MySQL types to SQLite-compatible types + print("Converting MySQL types to SQLite types...") + convert_mysql_types_to_sqlite(mysql_metadata) + print("✓ Types converted") + + # Remove indexes unless user wants to keep them + if not keep_indexes: + print("Removing indexes (not needed for unit tests)...") + remove_indexes(mysql_metadata) + print("✓ Indexes removed") + + # Create SQLite schema + print("Creating SQLite schema...") + mysql_metadata.create_all(sqlite_engine) + print("✓ Schema created") + + # Get inspector to check for foreign keys + inspector = inspect(mysql_engine) + + # Disable foreign key checks in SQLite during data load + with sqlite_engine.begin() as conn: + conn.execute(text("PRAGMA foreign_keys = OFF")) + + # Copy data table by table + print("\nCopying data...") + + # Create sessions + MySQLSession = sessionmaker(bind=mysql_engine) + SQLiteSession = sessionmaker(bind=sqlite_engine) + + mysql_session = MySQLSession() + sqlite_session = SQLiteSession() + + try: + for table_name in mysql_metadata.tables: + table = mysql_metadata.tables[table_name] + print(f" Copying {table_name}...", end=" ", flush=True) + + # Count rows in MySQL + count = mysql_session.execute(table.select()).rowcount + if count == -1: # Some drivers don't support rowcount on select + # Get actual count + result = mysql_session.execute(table.select()) + rows = result.fetchall() + count = len(rows) + + # Insert in batches + total_inserted = 0 + for i in range(0, count, batch_size): + batch = rows[i: i + batch_size] + if batch: + sqlite_session.execute(table.insert(), [dict(row._mapping) for row in batch]) + total_inserted += len(batch) + + sqlite_session.commit() + print(f"✓ {total_inserted} rows") + else: + # Stream and batch insert + result = mysql_session.execute(table.select()) + total_inserted = 0 + + while True: + batch = result.fetchmany(batch_size) + if not batch: + break + + sqlite_session.execute(table.insert(), [dict(row._mapping) for row in batch]) + total_inserted += len(batch) + + sqlite_session.commit() + print(f"✓ {total_inserted} rows") + + # Re-enable foreign keys + with sqlite_engine.begin() as conn: + conn.execute(text("PRAGMA foreign_keys = ON")) + + print(f"\n{'=' * 60}") + print(f"✓ Successfully converted to {sqlite_path}") + return True + + except Exception as e: + print(f"\n✗ Error during conversion: {e}") + import traceback + + traceback.print_exc() + sqlite_session.rollback() + return False + finally: + mysql_session.close() + sqlite_session.close() + mysql_engine.dispose() + sqlite_engine.dispose() + + +def main(): + parser = argparse.ArgumentParser( + description="Convert MySQL database to SQLite using SQLAlchemy", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + %(prog)s mysql://user:pass@host:port/test_core_1 core_1.db + %(prog)s mysql://user:pass@host:port/test_compara_db compara_db.db + %(prog)s mysql://user:pass@host/test_core_1 ./sqlite_dbs/core_1.db --batch-size 5000 + +The script uses SQLAlchemy to reflect the MySQL schema and copy all data to SQLite. +This preserves table structures, indexes, and relationships automatically. + """, + ) + parser.add_argument("mysql_url", help="MySQL connection URL (mysql://user:password@host:port/database)") + parser.add_argument("sqlite_path", help="Output SQLite database file path") + parser.add_argument( + "-b", "--batch-size", type=int, default=1000, help="Number of rows to copy per batch (default: 1000)" + ) + parser.add_argument( + "-k", + "--keep-indexes", + action="store_true", + help="Keep indexes in SQLite (default: False, indexes removed for faster tests)", + ) + + args = parser.parse_args() + + # Check if SQLAlchemy is installed + try: + import sqlalchemy + except ImportError: + print("✗ Error: SQLAlchemy is not installed") + print("Install it with: pip install sqlalchemy") + exit(1) + + # Create output directory if needed + output_path = Path(args.sqlite_path) + output_path.parent.mkdir(parents=True, exist_ok=True) + + success = convert_database(args.mysql_url, args.sqlite_path, args.batch_size, args.keep_indexes) + exit(0 if success else 1) + + +if __name__ == "__main__": + main() diff --git a/src/tests/databases/ncbi_taxonomy.db b/src/tests/databases/ncbi_taxonomy.db new file mode 100644 index 0000000000000000000000000000000000000000..b829fb4def48647a2b08e4a67355fa3d7e980f05 GIT binary patch literal 32768 zcmeHw3v^p&m98xvTe7W3-87LcS5RPO&!~@9ouml=Rumb?y+=ipF|H= z4?7NR({t2zfVMCN+Tqdi>I@f#UZA~YE}ba_3SmuOfngd3+L@sRS}3z%%J8~SF5K@w zFP(EF2iCfaweDKiUTqp{@Bgv)zyJMzd++~O=dPLNaxA@=&Lo)}9j&;mqO!8$YMQR7 zsHn!jR{X2{nD7@$`33*2)ck#;zgAat9zS3Q)ntFU!g&IJZ2oL&U{eE|8ramprUo`O zu&IGf4Qy&)Qv;hC*wnzEMFYaB$>M5nue>9eV-^x@Dz?DQ=a@6hd@3C`{AM0GJT`n} zj6O2FZ+eW@bJA_KHE5Pj&2w>j^5BuN@v+17!Pz79!K2gDyXe}Q6(+-`avM=IOzJd! zZ20iV#PH#^Xr!GU9os*AbovM#4C;DWVp9uXonPkSnBDwhf?1;X&CX7b4IeBv>DISN zu!|Ts6=%<=6H|LPux7ZWoz{t?_8Ha>f9UY!%<$n8^wii1x=m40du==4QYk84 zFdG%mB(;jiZ>EhD585A9e2OqBs_pIWZr3T4Q0HR_CY#mp6j^H9t6+7M^L1i-R_B|} zUpoI&`sL4JjGLWqYG6|Xn;O{Ez@`Q^HL$6HO$}^nU{eE|8ramprUo`O@MqV6rQpCA z)j}u!tl~%TC$&Us^TRa*70%b3&p97;zR!89Gv=Ihb~>+c{MPZZ<7b zj#h`w{$KWQ+n==mwf!CT75g#!mG&KWtL>+@7i@oT`-rVz<7~6GUYo~OS@*-b=j;AQ z-TUfpsk^amq^_;bUiJw zn?IWx*wnzL1~xUYsew%mY--^DVGV5k#J6`@D{4Gev$-a_!p1l@TY2Y0m3RJi!|Q*T zvsRcrH6TMSD+m((&|*&AKZs3Ks!{9_NIIGciTR8@0HdHQzPkf z%V;*2VYpQDE3?h79BTOR``eXPm1B3vJzAfcu|9LqbN7RU)-2BCJ&|mFL7u34`3vsl zC*61c^cHj~lIuUJp~mlV$M0{r^IKPg>LraGx2-Z6j?FPy--~O$7tgqa&)0z_idg@s zD#dTzhblp;EkpW+d-gYO;jzn=@q(v{<4xW7ld1cD>JmO4Q5)W-PW12G%U^T}ALJC$ zwx_vlI>F@F>HV)v@Bf~Q{}Gv`I1|Sfxz+ZQ&ul;Wb=Qx(6-sZHOqm*a+tkS0U0ETJe>F7qS64M$SW%Q|Zx1P?`>j*=*EC#67D>BQ(v?&9?{Qy9pg&QMdbwVoy6?yC z3#ZEye>ipDj~XtVDt4-lci(GM_gfk+KxWB`G*h|n2UGXG>blUO&R?C$#~z>h*x$P@ zm{dxIGRHe=r|xjNoF_rR^N_luH_ZipogDT}S3T4$jwd!#_ zH?LA|P$+8;Os)Nc>$y>tvPYpT{qL!z-@BeYs!#&oR4Aw3oI3S8_p?q=MEwQs*4Rbj zFQyW2b3JQQo86@}OWZk?_>Ak>YL#+_LJ?k`5?*yZbE`_ZS*3h)O87U|GsNUYa|skF z-HN*Phw-1d&C+wOHettzPHs|w}FUrrsl*Y&k=rHy~TLJ8eJ75b>_%Ww|T zIP|Av%7LL@9T@tx>x-(v`ks|3laY$ah{g3qx6;P; zPdwv#a<@tuR4D8xCfHB9p4_5RE>|dH*G-IF@A~|ZLb-KbrVRY<3j@D<()C$&SsSjG zDQyS8(suBx4gY;ovBqjB~l;&3=&ENl&>mf!NsQ$Y$#qD>y z{Q=iQCsoP|3WaNNb5Zw0$3c;N(o02KaB~4S*XnvmHKBU7+4381&gpvSpxWW{N{2h{ zckg(=>mkvt5I0?~7_ocy=k7yR*F()}n~x}M;_q>pQJk>$KHvdBgIQU z+F!MedV_kNdX)Mgm87PrZmQAzL-Uu-?=@%4d(E`z4buhFCrs}!#ZCRDhRUB*p0B*G za zhBh*{TrVp2;zEXHz_fv4o35iwAy55cJkiM2O zMKyh7nOF=nV{-YVqS_ObX-d8#pa_&8;{ zM$?@_ruVVgEL4F@NprE~d?Kfr<1xzQiPm4pN^+JlvCvV5Q`Y@~?_w zjzDdpqGA-Pe&xI8U{RWRDqU(sCMG)AqYA$DFlE~6ufLGw5^0T@yRV^4K7728NvD&s zbS9;t`3_O0Fy=j+if7U*=_TwKSx9-VyJjiVIA&4GAnATzCaqD_)`Q{<9?ftuRx?WD z3}y2A>mSXqaoRv?o`%te>K~oWte<7*EQ3A6Wc7+@nG)?t$5Lwjod>`bt$%blOKdxa z^+QiNdB8F&)-owB!bd04nK-MlUH#a>3Cgs`S8wDh9o{ugnHv4|A6;NF33gp0d-r~r zoAwRu*bFg8#WvIrxKu2WkF(j+(YG?7^K{F6*0i_tYrk2w zZl<0g%5)MvO{>yiZxp+uMZt={D_eUvWg6PJm9nV@C7Cwrdxo4p>P*@O;2}fKwi!0g z#h9fuoy+7?F*r!v5!732cRz6%WjX0_hRx(s;+Zknt@a)0!}g0h+hDdCHbE!J5arCH zmzamsl1;BKuuMv89&``!jVh^3V|SD?1wBs7p=Bn^rW5%ron+3)ng~QF)1c2lD_vGP zjN>Tew2Z^HxD+dPpcg&VP0n&1hkQ7&nGBJ7FkC##16@RF$8Bp|B2LGcg#~7bEi>`f zPO=dlx9!W|GA2|{rxR?3iLpktw|9_@Scb1ukk(z$MbvR_kz+A*GM~-i5H2(Hju7}l zj&thJ8D_;eY+xtRoq?xxNOwDN|Bjo{1Dj>pxOVZpZSdDoiB@J7-XK8~j+@h5g0w_h z1Zk5ZuJX51ruLBIW_6M&W+k6B*ko$}JHlsVDxGFa3qc%?)!Z_ZVCh%}hjE!1HTl8i zm$^DbF3p7K9q{R(Ce!&W+rcs!sUHnTQ!_yOsyv8w!rY0;a7!Hmc8KDQeyY*DBhcY#b)RBF3Z%=pkfUfa&H`GYT`woxL;qFMJ zJF2658nNF&pPm`t9qAs7=!p#~kuD{=B;tXQ89Ln8GpK9Xv`w1xYla9B@m-U>hG zHATj8d}O{ix_@|BVeH%j#*nVBuDNAyiA^LJ zga!$Yy(tfTK#m3xW(O_@X4LbGYzAYVs;@HfW}mKKk&kYh;g;Z#v514c?eqwf&5@x} z9L=QS^a#u>i4&1{p+R+tw_gVL;z3^ofdjC`VsMRQZn8BtIN@3Spm&O&YOUBI>2Y*! zhK^!w4U1=&mSAM^5SG4Ezw%z2WEdkON5|>NAX@8I$X$mqvG5YPM^CGTNkaJxOpKTy z5X@Lq(YL1tt^l&^Q)wDOMy-@-4NB7!4Jw;vmy=8?CF<)S02%o^l-Z}M%Wr|ZD;WZ+ zNhb3tR@PZhHDx+pI+V;rtEPpn2YS`6Jyr0wI?2M_$k$46%uSKwOPoE9W&IMhnPFzq za77xeF$Hx9uSwGT@R1ShxNv`euTHl+D{*{b2E(!COgfpCR4iJ**dW>)jSMIgBJuxq z73V9Q-*$f5d9QQPd8PAm$7_y<94W^&j<`+L_71zr_M+`6)cJYab+$dW zrn>*C`{%mPA@-lI+f(O4b^nvKi-`HZTl09$yK8Q!30Z$;{i^jn*6XaDmfu_c&GLxl zt(Iv^u=+o%|FQbR)wfp9R)?#vsQOveSE@czb*}2hs-Y?`^&0gA^?quF>Y?h)FPk4T z-)mkpUu|wR{lWB<>0_ov(^V#S<@YK-S9xdUa^*y2L&YyKaq`o_1CsVe>4$TOJ8-rp zXgqmX!x^)fNiUUg{4@`Q8uic*&vA)7((?>ZkkpXN%4^@j1F25>%CgYo8D^ch*uG-8 zpNpWXJ=SoSD5mQ0fWa2Fy4$bYrxugEaF1t-Y9WnL!k(0z4pU zAnNJ+{Ou`mV)2%cDZZRM5P%>$^$&^L+tI3}xh-L_%hMIl>wVAfEmq_M$OyLe#MM!)X^z_iS1 zM&mvGJRoZrlmoz2@EoidlhTV^c3H#K#sjv7q8cu#qcW8`4v}c>vpR$hUhqn_I^&KBJkb9yj&yfVamn<5Et#%{bSh zb$EEd-DbaU%UO0I!x)F(!2|I6qB51tt&CEec)(u2+C?0vxHg@#od@g;D8ChS+a4-saPLgTt;(>a{F&90R*3U-uG23}y-!60p)E`@3 z7lqgL*UAI&!X`@@;M=uxZsCD(hdfO~bLo|3@(^i zU;*qztwpt)t@7Y`&n6lxk8MNS5DfIHOBL2r~%9>Dl0 z=CT1ht(}UhvUVPbdBlrW9NC@{svSKzJl7E!9_)xjdir|$l*4^H4*=ceX_!B}%q+x_ z`p(iUTr6U4^^^mMp7fPrpl6sof<#yk!U1*g5D!>A7;2awVc1kUQ_`zmz#bk5`#5?p z&kbE`#whB#lLy|O^=y0k5ONa9bew?0wrT8tEI7xs(~C?Z#$mIlN!Xn{0C#^#*FeLh zpHx3z50RtWo=y_aijxREFk;}u=}WI%jlexh)yThV>tBk$oD`fG3x^(9e3ha4+Q z2SsIQMhhB(stz7e!JbQWVepDWJOTx_L>|qMaQxXM$@sJD@`f)WEI%&>2qhCn&LV~Q zT-L6SM}!c7MGym^Pjr+TKr@eMpcnd|LFNz2Ng`lWg_e8i;t>?gV5a4GNR28f#`BS4 z%E`kcRPbWlk$rOn|Leo`CLZBI2xFo+6X)U#ok=f{+>kt6kVkmX9^EpEa?Bc=31;ba zB%#Wcy0-HbQ9Bo5g!D#alK0}*|U=f9O$X#FeKvhahzt5hmfigF9<$m~HhZ0%#rD)9d**#^OMQy#JXN_N*lG8 zm#TS{7>{OZ44Yf#NTj0rzHnYFKoz^xu+RsX)A@z7#CisMiq_MVBZ#E_r3|;q=1Pin z+p}cN>t9+)BLhKddZb*ZS@Z1~i9VKELULibgzmYCK&JYaVrisSjg6x#@Vy~T9adQr zrG6nQ#IQiaY07kyws>2%5y5e&=C;!l`K*|k9vCcUM|P&*xjp5^9Tf|~ce-|8RWW@d?K{$8kr*QE7k0{*?Vb zJ8KWy>uj&tK5cuO?Wiqky9{^y&)0pb?p)o;x~{sK+V9nVw)S1M3$<6(dTV}GbD`#w zHFwwCRCBauca7Wny7g)6yRC8Sn6<;Y4cUNiS^n1Y7nUWvseZNk+3E+XSF0zh z8>{|T)px4CTy=j{uIgmfRaF7%57Y~|0dO~!p$<`lRFnBP=I@xlV*ZHvX7h3LPII;C zRnvLX!zSJ|Z`yBat$efcJC%=Bez5Xv<#m;VmD?be{-;$y;L?v=r5dcQEt8WU`P&6f zYviU^w}#Umg-Y4*3iAeIKQ%PhO1-ADpr5?8eK z1(8q&Z)_6~efT`BFU;bMq;dW$$IcD`fk(eez}37IIrXzNxuvHt`2++W%^|t9MA19G zHUY86UZ0j@qbSlVAPVVFMr6dx5Zb}E2?#-4kTk(07qm42pMYqi9RyWNMm4kv2sVbj zt>+ISNygIg;P7IGyD5*qv2-Fhn{8F09X-Dx2h&r}LB|;fZZWj<{ z1kq5P1?fSv7R>^}j6kUM{0OrGNWIKu2r|;EYo~w+W5lQD*+^kK1jHFwBDq5bpnHjg z(sIwz9&Hv7R74><5g7qC9baQIi?k@6A~P)@rU;kiQCPMJh$HI#t&gYAXhiY~h!X<% zRFyfFUenNN0kJ_yM`v=mg!Tk=3nax(SQsv|j^JF|{t5x1z!nhoANvGg{>CUi7JDnR z7%l>?n#Yz00603L=7F4SBY_;2G% zwFv;~Fl-ISM(gAk0MG|L5-Z}_BnkdFER@`_v{2gwKyvIvu|qO#Ws|uDfN*GNUfkge zX(pr5@HPS18|FHn#l0A%OOd8VZZLLiJUWNo3j*TSPDgYO6#V9M> zl6Frt3BbQwqT=UeR$;pUfD7+4iby$Iu3p(80Nc)ZT1Urj%43=V5p za9J7rl~mOg?Ra^>+zyYzrOZR6(>!plKcvv5rh09(^8mWTn4&b0$g+_t0zANPAR4%+ z3`8bV#tOFb0KZ%Pfr}t(jpqs;NOz0$xkgJ{c>q|jzbl^B+Zqp`x+N-pUS^aK50E+- z3cL#e3G|Z4qp*um8~G^?hlp|a_VB=`$9)@dZzQsp2XKXk<6fv&nPJW4;n|hv&M_eQstH7;uaK&O-W+{=5 zujf=))5WtSST4h`QI{=z90y$}aO)wQu&DE>9qPB2pO0Z(qJdjcDo!Gmf0ixh72Ej* z7)3O2%gXvv5=Y{tc$;}fRL(7AVD!l0`jAmM?fi}6YUp@KB?L&|EQ#A9#URGT&lCR^ zxMcyU`2}qj*~i~N{99lmv&gQjUde{R5f=tDiqXP6T5U zgYowAZz1X1z{CiuXo!Pz%Pe%N_HXQ7_1`=A;{=Xv&`v(V{QNO8g_iR(Kx#q=7ACQ`-O%U?s>n17ty zn8M&t*OKwu#E_1QdBqb+H+sIELo#gkAD+G;I()IN#}HoO_)uj^8*gI{waakK;y1oBj9p7wnJP@31e~N9|3vU)ui3_IcY~wk6x7 zt;^=b9e}Ua-Bou@U9|QWwdZR;RC``nndWV&> z9QsHMy!C7riP-O@`WOVuuWtFA%~W!@Vy_D(zuJg$_`(i>)^V5Q7Q zH|$-$<(%oz4HGI@D03H7SDQf57&H>4OBb)psLxxdrp$gnxaD~mE@*oTRT?@xp@AMM zP?WhpRPlk!XGFJ7F5GJVVsOQQf|)W8d5qj}4CFd%v0mL*Fj3~QDELUCo9qQOt+Pxn z_u;(6pJl}H6)GXTr{X=4ViK9j<3^c!_c&6hpv>3!%5l(bBk61w2}zvhs5LDYqY0mI z&e$yy1jrY!DB;2$;a1ALr0Z7UK$&VE%jA(YM9JlFnoV{}S0{9D44Q>oD07rd{18JN z)jBCRA*9t#JuKWznddg(Q4HG9CL+957Pm~ls=&jY)9F>rQGQnw6wX525GFG$OIe(X z-a+W#4h2aMP-t6wWo-PXyFWH4igbn{_IV8+&)fcm1;*=Sd*nJWl@AIZL=!2 zx#x9F1+rLBq}${-sC0C+U)_#EUfhlwwE?Tadi?_8oaSimYLXqm-bQJuj0uJWgf)|% z+||d#@+~qZ+@e_ZC9ioqY2|kX!Uza*+CxTe!^o`yBAX$fk*l=cu~R^Z(;w>Xo&n;F zvk8>6v+5ac1V#}7LC@Z3Z?_nrWXUe<$T7GV6sPeRB#TE1WyI?e5Dm@vdb>wCv40ea z6bQhekhp~RG~${U@3PVYf~2UYYztWnjRHa?zc+aMSuRJ=6C(N|KoCfT1lnPg0hjV@$0)Og_ZNgm!56IpBBDKDISfDx&qUThzEvxAs4(sj5E=Cr84rzw zBRGOdQDkyXVQLi+8g+%#-cV9s#TF?I6SfKni?;fMw-Zq`r}q^CqMC76UMYJlTdyjyKr`;30o%mYK&~m&iAkvxg8L4>KlmXy^-y}8O{=VX7Y!VRe zv_s+qvp|w4X`~GCA4S$i0g(<-{J#7m_7Zs(uhQHCA{!sXz^kG)l55xIWxQRItAOBU$QMkK zsjo12x5m*&mhn(VyoC`i-iY%Fh-R8YT1JD2tpY+B6graQh(_YrM5ywLgNtg@Of4uN zni(v^*kR;s5)i|LJi+8hnoZyy1DRrmfkTkA&?_Kh8H)xJ$}0VdN32I6(TgXT5QWa5 zddZ$Crp;Oe1TRA}RnfEZ>dBz2;)DUPJX0In1e!7TYUWLM&E>0m`5PU@#c zqwolbZImf%29XreFCe0sj0R6mqIknGZEqfbDHbJ7<)O;n*ufg7YbTr9*# z$4MwCb|JdN;)Fs1f}X+9Mhy+N;T8}E1w9hC+Pl1rUI776$QwL`8-OYDW&(AsB%rdg zi(LZ3ARp){JTB8wJOaX=ASh(^#4Uy+gLB7}cFh7}oIpsTDzX_Swo^c8Gvd?pY!roV z6A<3G{J~R3J8c$5NKqwtYI-V)sHm(IEy6x=L!P?yf&_$Nl0^-2hu8C`*Yg{0Tw1uA z9D*n{`^+Qk6>Wwi+9CINfKZA6b*FHZD7k@3Poer4XRz9Xk>Xt%pRh;V z{UURW$&fq#dIuL2t|U)Sf*KBbUm8AJ^lyXR#ltix43Vfd*fBz;4%CdZfb2IeW zE9@qjmvWrAqMk^x9b+Uij}u=t&{w={uu~W$!L65L!DmqIx&k2HHid`d=En31; z=Adj4ddTC6ph>C|YkTntQ6vna3aL!vokE0QsGtb}Wh{qxn@iAfEKjb9km8Ac;9%LRo7qpo7%6| z-d=mCHc;~hZv4L&vHqT#I_rz3y%pRv5#l1IEhWbs-m zs=r_Twdzk)-&LKez6LM;w^sccHUEF8dJwMwn5vPgmZ~be19*yhAC;yisO{!o;T^zZ zmkRDTX`34O|4sw@{_i9X6iy-!6Rq8~>j194u$i@V>~y&s&B4O8By>VH?r9uP+kr?kS-H z!m8BLwm`0g+S51KJ2+6%7C^NTUuX~=7>tCCv-T8#LmP1sBGQlMr9A_OqV2^9Lh}C7 zw!oT=K+KqNf26-;TmZ^OJgE;4_Vx}$N~i#kXxkC)!9y}*&+Y;cqzm6TFxcHUXl&{( z06w|^0|$D$drMmaLAr2bv%kNuyL3wXDYJ_tk@~y)28@bw7XTeyco5YWj+98_E)b{~ zYCYfE-`(G55XM~qTy)`DaZh-l-?;Fe!ajM>p6<1_z@?6*LwA z(YE5=Z%=Q3sRnu}^VU%1|GcHAH(aXN9zuj6^z@XNW@7S zH0#@*&_4(%z6qe`A_D^>20M!sT2R0US)Ti(`sNsyBgms@l6=IY_VI9EZ?7Rn+FS5b z=J|~qP2stMm;o>LU>NqQ!Vb!O{Uv%R>A*0Bo ze=+~Equ_;S@xx!`*3unFVIt*S^3^>%3r$!<511zOUlJtUE7yP9Ra?PBHrz(cNWK&w z02JJ0!(Ebh&4%kOG*af{<=V(k8I|JGt9TEumvjhlx-&qs8->Aou!nfqt4Heh_1ENlF5C8xG literal 0 HcmV?d00001 diff --git a/src/tests/databases/ncbi_taxonomy/ncbi_taxa_name.txt b/src/tests/databases/ncbi_taxonomy/ncbi_taxa_name.txt index 5555cf0c..a8f5dd28 100644 --- a/src/tests/databases/ncbi_taxonomy/ncbi_taxa_name.txt +++ b/src/tests/databases/ncbi_taxonomy/ncbi_taxa_name.txt @@ -1,11 +1,11 @@ 562 Achromobacter sp. ATCC 35328 includes 562 ATCC 11775 type material -562 "Bacillus coli" Migula 1895 authority +562 Bacillus coli Migula 1895 authority 562 Bacillus coli synonym 562 bacterium 10a includes -562 "Bacterium coli commune" Escherich 1885 authority +562 Bacterium coli commune Escherich 1885 authority 562 Bacterium coli commune synonym -562 "Bacterium coli" (Migula 1895) Lehmann and Neumann 1896 authority +562 Bacterium coli (Migula 1895) Lehmann and Neumann 1896 authority 562 Bacterium coli synonym 562 bacterium E3 includes 562 CCUG 24 type material @@ -467,3 +467,6 @@ 562 1637691 merged_taxon_id 562 1806490 merged_taxon_id 7227 2267365 merged_taxon_id +666668 carol_jabberwocky scientific name +6666666 Jabberwocky scientific name +666668 carol_jabberwocky scientific name diff --git a/src/tests/databases/ncbi_taxonomy/table.sql b/src/tests/databases/ncbi_taxonomy/table.sql index 78777a3c..016cdd55 100644 --- a/src/tests/databases/ncbi_taxonomy/table.sql +++ b/src/tests/databases/ncbi_taxonomy/table.sql @@ -20,4 +20,5 @@ CREATE TABLE `ncbi_taxa_node` ( KEY `rank` (`rank`), KEY `left_index` (`left_index`), KEY `right_index` (`right_index`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1; \ No newline at end of file +) ENGINE=InnoDB DEFAULT CHARSET=latin1; + diff --git a/src/tests/test_dataset_factory.py b/src/tests/test_dataset_factory.py index 82cdf350..2673b6b6 100644 --- a/src/tests/test_dataset_factory.py +++ b/src/tests/test_dataset_factory.py @@ -105,7 +105,10 @@ def test_genebuild_workflow(self, test_dbs, dataset_factory): production_name="new_grch37", assembly_id=40, created=func.now(), - organism_id=9) + organism_id=9, + annotation_source="test", + genebuild_date="2026-04" + ) session.add(genome) genebuild = Dataset( dataset_type_id=2, @@ -185,9 +188,6 @@ def test_genebuild_workflow(self, test_dbs, dataset_factory): succeed_status_check = session.query(Dataset.status).filter(Dataset.dataset_uuid == xref_uuid).one() assert succeed_status == succeed_status_check[0] - failed_status_check = session.query(Dataset.status).filter(Dataset.dataset_uuid == protfeat_uuid).one() - assert failed_status == DatasetStatus.SUBMITTED # "Submitted" - assert failed_status_check[0] == DatasetStatus.SUBMITTED # "Submitted" # succeed on xref temp, succeed_status = dataset_factory.update_dataset_status(xref_uuid, DatasetStatus.PROCESSING, session=session) diff --git a/src/tests/test_genome_factory.py b/src/tests/test_genome_factory.py index c8e99a56..d5742b60 100644 --- a/src/tests/test_genome_factory.py +++ b/src/tests/test_genome_factory.py @@ -95,7 +95,7 @@ def test_fetch_genomes_by_genome_uuid(self, test_dbs, genome_factory, genome_fil metadata_db = DBConnection(test_dbs['ensembl_genome_metadata'].dbc.url) with metadata_db.session_scope() as session: - genome = session.query(Genome).filter(Genome.genome_uuid == genome_filters['genome_uuid']).one() + genome = session.query(Genome).filter(Genome.genome_uuid == genome_filters['genome_uuid'][0]).one() assert genome_factory_result['genome_uuid'] == genome_filters['genome_uuid'][0] assert genome.genome_uuid == genome_filters['genome_uuid'][0] assert genome.genome_uuid == genome_factory_result['genome_uuid'] @@ -112,7 +112,7 @@ def test_fetch_genomes_by_dataset_uuid(self, test_dbs, genome_factory, genome_fi assert genome_factory_result is not None metadata_db = DBConnection(test_dbs['ensembl_genome_metadata'].dbc.url) with metadata_db.session_scope() as session: - dataset = session.query(Dataset).filter(Dataset.dataset_uuid == genome_filters['dataset_uuid']).one() + dataset = session.query(Dataset).filter(Dataset.dataset_uuid == genome_filters['dataset_uuid'][0]).one() assert genome_factory_result['dataset_uuid'] == genome_filters['dataset_uuid'][0] assert dataset.dataset_uuid == genome_filters['dataset_uuid'][0] @@ -124,7 +124,7 @@ def test_fetch_genomes_by_default_status_submitted(self, test_dbs, genome_factor metadata_db = DBConnection(test_dbs['ensembl_genome_metadata'].dbc.url) with metadata_db.session_scope() as session: dataset: Dataset = session.query(Dataset).filter( - Dataset.dataset_uuid == genome_filters['dataset_uuid']).one() + Dataset.dataset_uuid == genome_filters['dataset_uuid'][0]).one() assert genome_factory_result['dataset_uuid'] == genome_filters['dataset_uuid'][0] assert dataset.dataset_uuid == genome_filters['dataset_uuid'][0] assert dataset.status.value == genome_factory_result['dataset_status'] @@ -143,15 +143,15 @@ def test_update_dataset_status_submitted_processing_processed_released(self, tes # fetch genomes by status submitted and update to processing genome_factory_result = [genome for genome in genome_factory.get_genomes(**genome_filters)][0] - logger.debug(f"Factory Results 1 {genome_factory_result}") + # logger.debug(f"Factory Results 1 {genome_factory_result}") metadata_db = DBConnection(test_dbs['ensembl_genome_metadata'].dbc.url) with metadata_db.session_scope() as session: # check genebuild one has been updated to Processing as well dataset: Dataset = session.query(Dataset).filter(Dataset.dataset_uuid == genebuild_uuid).one() - logger.debug(f"Dataset 1 {dataset}") + # logger.debug(f"Dataset 1 {dataset}") assert genome_factory_result['updated_dataset_status'] == dataset.status.value dataset: Dataset = session.query(Dataset).filter(Dataset.dataset_uuid == leaf_uuid).one() - logger.debug(f"Dataset 1 {dataset}") + # logger.debug(f"Dataset 1 {dataset}") assert genome_factory_result['updated_dataset_status'] == dataset.status.value # update dataset status to processed @@ -160,13 +160,13 @@ def test_update_dataset_status_submitted_processing_processed_released(self, tes # fetch genomes by status processing and update to processed genome_factory_result = [genome for genome in genome_factory.get_genomes(**genome_filters)][0] - logger.debug(f"Factory Results 2 {genome_factory_result}") + # logger.debug(f"Factory Results 2 {genome_factory_result}") with metadata_db.session_scope() as session: dataset = session.query(Dataset).filter(Dataset.dataset_uuid == genebuild_uuid).one() - logger.debug(f"Dataset 2 {dataset}") + # logger.debug(f"Dataset 2 {dataset}") assert 'Processing' == dataset.status.value dataset = session.query(Dataset).filter(Dataset.dataset_uuid == leaf_uuid).one() - logger.debug(f"Dataset 2b {dataset}") + # logger.debug(f"Dataset 2b {dataset}") assert genome_factory_result['updated_dataset_status'] == dataset.status.value # update dataset status to processed @@ -176,11 +176,11 @@ def test_update_dataset_status_submitted_processing_processed_released(self, tes # fetch genomes by status processed and update to released with pytest.raises(DatasetFactoryException): genome_factory_result = [genome for genome in genome_factory.get_genomes(**genome_filters)][0] - logger.debug(f"Factory Results 3 {genome_factory_result}") + # logger.debug(f"Factory Results 3 {genome_factory_result}") # assert nothing happened in DB with metadata_db.session_scope() as session: dataset = session.query(Dataset).filter(Dataset.dataset_uuid == leaf_uuid).one() - logger.debug(f"Dataset 3 {dataset}") + # logger.debug(f"Dataset 3 {dataset}") assert 'Processed' == dataset.status.value # TODO complete the test with all sub datasets updated to processed before moving leaf to # release then asses that genebuild is now released @@ -201,3 +201,4 @@ def test_expected_columns_on_update_status(self, genome_factory, expected_column expected_columns.append('updated_dataset_status') returned_columns = list(next(genome_factory.get_genomes(**genome_filters)).keys()) assert returned_columns.sort() == expected_columns.sort() + diff --git a/src/tests/test_grpc_release.py b/src/tests/test_grpc_release.py index 74a8660d..fe0b191b 100644 --- a/src/tests/test_grpc_release.py +++ b/src/tests/test_grpc_release.py @@ -60,13 +60,13 @@ def test_fetch_all_releases(self, release_conn, allow_unreleased, expected_count logger.debug("Results: %s", releases) assert len(releases) == expected_count assert [release.EnsemblSite.name == 'Ensembl' for release in releases] - assert releases[1].EnsemblRelease.label == 'MVP Beta-1' + assert releases[1].EnsemblRelease.label == '2020-10-18' @pytest.mark.parametrize( "allow_unreleased, genome_uuid, release_name", [ - (False, 'a73351f7-93e7-11ec-a39d-005056b38ce3', 'First Beta'), - (True, '75b7ac15-6373-4ad5-9fb7-23813a5355a4', 'MVP Beta-2') + (False, 'a73351f7-93e7-11ec-a39d-005056b38ce3', '2023-06-15'), + (True, '75b7ac15-6373-4ad5-9fb7-23813a5355a4', '2021-10-18') ], indirect=['allow_unreleased'] ) @@ -84,9 +84,10 @@ def test_fetch_releases_for_genome(self, release_conn, allow_unreleased, genome_ "allow_unreleased, dataset_uuid, release_name, release_status", [ (False, '8801edaf-86ec-4799-8fd4-a59077f04c05', None, None), # No release returned is not allowed - (False, '08543d8d-2110-46f3-a9b6-ac58c4af8202', 'MVP Beta-1', 'Released'), # No release returned is not allowed - (True, 'd57040b6-0ef5-4e6b-97ef-be0ad94d3a61', 'MVP Beta-2', 'Prepared'), # Processed Beta-2 - (True, 'd641779c-2add-46ce-acf4-a2b6f15274b1', 'MVP Beta-3', 'Preparing'), # Processed Beta-2 + (False, '08543d8d-2110-46f3-a9b6-ac58c4af8202', '2020-10-18', 'Released'), + # No release returned is not allowed + (True, 'd57040b6-0ef5-4e6b-97ef-be0ad94d3a61', '2021-10-18', 'Prepared'), # Processed Beta-2 + (True, 'd641779c-2add-46ce-acf4-a2b6f15274b1', '2022-10-18', 'Preparing'), # Processed Beta-2 ], indirect=['allow_unreleased'] ) diff --git a/src/tests/test_organism_to_organismgroup.py b/src/tests/test_organism_to_organismgroup.py index 102276bf..97eef0c0 100644 --- a/src/tests/test_organism_to_organismgroup.py +++ b/src/tests/test_organism_to_organismgroup.py @@ -9,14 +9,14 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import re +from collections import namedtuple from pathlib import Path + import pytest -from collections import namedtuple +from ensembl.utils.database import DBConnection + from ensembl.production.metadata.api.models import OrganismGroup, Genome, Organism, OrganismGroupMember -from ensembl.production.metadata.scripts.organism_to_organismgroup import process_genomes, \ - create_or_remove_organism_group -from ensembl.utils.database import UnitTestDB, DBConnection +from ensembl.production.metadata.scripts.organism_to_organismgroup import process_genomes db_directory = Path(__file__).parent / 'databases' db_directory = db_directory.resolve() @@ -77,7 +77,7 @@ def test_add_organismgroup(self, test_dbs, genome_uuids, organism_group_type, or ).join(OrganismGroup, OrganismGroup.organism_group_id == OrganismGroupMember.organism_group_id ).filter( - Genome.genome_uuid.in_([args.genome_uuid]), + Genome.genome_uuid.in_(args.genome_uuid), OrganismGroup.name == args.organism_group_name, ) ) diff --git a/src/tests/test_protobuf_msg_factory.py b/src/tests/test_protobuf_msg_factory.py index 1244b6da..d7dbc686 100644 --- a/src/tests/test_protobuf_msg_factory.py +++ b/src/tests/test_protobuf_msg_factory.py @@ -106,44 +106,25 @@ def test_create_stats_by_organism_uuid(self, genome_conn): organism_uuid = "1e579f8d-3880-424e-9b4f-190eb69280d9" input_data = genome_conn.fetch_genome_datasets(organism_uuid=organism_uuid, dataset_type_name="all") - first_expected_stat = { - 'label': 'assembly.accession', - 'name': 'assembly.accession', - 'statisticType': 'string', - 'statisticValue': 'GCA_000005845.2' - } output = json_format.MessageToJson(msg_factory.create_stats_by_genome_uuid(input_data)[0]) - assert json.loads(output)['genomeUuid'] == "a73351f7-93e7-11ec-a39d-005056b38ce3" - # check the first stat info of the first genome_uuid - # print(json.loads(output)['statistics']) - assert json.loads(output)['statistics'][0] == first_expected_stat + output_dict = json.loads(output) - def test_create_top_level_statistics(self, genome_conn): - # ecoli - organism_uuid = "1e579f8d-3880-424e-9b4f-190eb69280d9" - input_data = genome_conn.fetch_genome_datasets(organism_uuid=organism_uuid, dataset_type_name="all") + assert output_dict['genomeUuid'] == "a73351f7-93e7-11ec-a39d-005056b38ce3" + + # Don't assume order - search for the specific statistic + stats = output_dict['statistics'] + assembly_accession_stat = next( + (s for s in stats if s['name'] == 'assembly.accession'), + None + ) - first_expected_stat = { + assert assembly_accession_stat is not None, "assembly.accession statistic not found" + assert assembly_accession_stat == { 'label': 'assembly.accession', 'name': 'assembly.accession', 'statisticType': 'string', 'statisticValue': 'GCA_000005845.2' } - stats_by_genome_uuid = msg_factory.create_stats_by_genome_uuid(input_data) - - output = json_format.MessageToJson( - msg_factory.create_top_level_statistics({ - 'organism_uuid': organism_uuid, - 'stats_by_genome_uuid': stats_by_genome_uuid - }) - ) - output_dict = json.loads(output) - assert 'organismUuid' in output_dict.keys() and 'statsByGenomeUuid' in output_dict.keys() - # These tests are pain in the back - # TODO: find a way to improve this spaghetti - assert output_dict["organismUuid"] == "1e579f8d-3880-424e-9b4f-190eb69280d9" - assert output_dict['statsByGenomeUuid'][0]['genomeUuid'] == "a73351f7-93e7-11ec-a39d-005056b38ce3" - assert output_dict['statsByGenomeUuid'][0]['statistics'][0] == first_expected_stat def test_create_genome_sequence(self, genome_conn): input_data = genome_conn.fetch_sequences(genome_uuid="a7335667-93e7-11ec-a39d-005056b38ce3") @@ -196,7 +177,7 @@ def test_create_genome_assembly_sequence_region(self, genome_conn): (False, 108.0, { "releaseVersion": 108.0, "releaseDate": "2023-06-15", - "releaseLabel": "First Beta", + "releaseLabel": "2023-06-15", "releaseType": "partial", "isCurrent": False, "siteName": "Ensembl", @@ -205,8 +186,8 @@ def test_create_genome_assembly_sequence_region(self, genome_conn): }), (False, 110.1, { "releaseVersion": 110.1, - "releaseDate": "2023-10-18", - "releaseLabel": "MVP Beta-1", + "releaseDate": "2020-10-18", + "releaseLabel": "2020-10-18", "releaseType": "partial", "isCurrent": True, "siteName": "Ensembl", @@ -215,8 +196,8 @@ def test_create_genome_assembly_sequence_region(self, genome_conn): }), (True, 110.3, { "releaseVersion": 110.3, - "releaseDate": "Unreleased", - "releaseLabel": "MVP Beta-3", + "releaseDate": "2022-10-18", + "releaseLabel": "2022-10-18", "releaseType": "partial", "isCurrent": False, "siteName": "Ensembl", @@ -267,16 +248,9 @@ def test_create_organisms_group_count(self, genome_conn, expected_count, allow_u "genome_tag, current_only, expected_output", [ # url_name = GRCh38 => homo_sapien 38 - ("GRCh38", True, {'genomeUuid': 'a7335667-93e7-11ec-a39d-005056b38ce3'}), - #Todo: Need to review how genomes are fetched from release version (minor revision) - #genome_select = genome_select.filter(EnsemblRelease.version <= release_version) - #if a genome is assigned to 110.1 & 108.0 and current release version is 110.3 - #the return should be ordered to its genome last release version 110.1 - ("GRCh38", False, {"genomeUuid": "a7335667-93e7-11ec-a39d-005056b38ce3"}), - # tol_id = mHomSap1 => homo_sapien 37 - # I randomly picked up this tol_id, probably wrong (biologically speaking) - ("GRCh37", False, {"genomeUuid": "3704ceb1-948d-11ec-a39d-005056b38ce3"}), - # Null + ("grch38", True, {'genomeUuid': 'a7335667-93e7-11ec-a39d-005056b38ce3'}), + ("grch38", False, {"genomeUuid": "a7335667-93e7-11ec-a39d-005056b38ce3"}), + ("grch37", False, {"genomeUuid": "3704ceb1-948d-11ec-a39d-005056b38ce3"}), ("iDontExist", False, {}), ] ) diff --git a/src/tests/test_release_factory.py b/src/tests/test_release_factory.py index 56444403..305a0e6b 100644 --- a/src/tests/test_release_factory.py +++ b/src/tests/test_release_factory.py @@ -10,6 +10,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging +from datetime import datetime from decimal import Decimal from pathlib import Path @@ -41,21 +42,20 @@ def test_init_release_default(self, test_dbs) -> None: with metadata_db.session_scope() as session: last_release = session.query(EnsemblRelease).order_by(EnsemblRelease.version.desc()).first() expected_version = Decimal("1.0") if last_release is None else last_release.version + Decimal("0.1") + label = "2028-09-11" + date = datetime.strptime(label, "%Y-%m-%d").date() - try: - # Call init_release but don't assert on the returned object - factory.init_release(label=str(expected_version)) - except Exception as e: - pytest.fail(f"Unexpected exception: {e}") + try: + factory.init_release(label=label) + except Exception as e: + pytest.fail(f"Unexpected exception: {e}") - # ✅ Re-fetch in a new session with metadata_db.session_scope() as session: release = session.query(EnsemblRelease).filter(EnsemblRelease.version == expected_version).one_or_none() - assert release is not None, "Release was not inserted into the database" assert release.version == expected_version - assert release.release_date is None # Should allow NULL - assert release.label == str(expected_version) # Default label behavior + assert release.release_date == date + assert release.label == label assert release.release_type == "partial" assert release.status == ReleaseStatus.PLANNED diff --git a/src/tests/test_updater.py b/src/tests/test_updater.py index 823ef75b..28d50fb8 100644 --- a/src/tests/test_updater.py +++ b/src/tests/test_updater.py @@ -33,8 +33,7 @@ {'src': Path(__file__).parent / "databases/core_5"}, {'src': Path(__file__).parent / "databases/core_6"}, {'src': Path(__file__).parent / "databases/core_7"}, - {'src': Path(__file__).parent / "databases/core_8"}, - {'src': Path(__file__).parent / "databases/core_9"} + {'src': Path(__file__).parent / "databases/core_8"} ]], indirect=True) class TestUpdater: @@ -42,7 +41,8 @@ class TestUpdater: def test_new_organism(self, test_dbs): test = meta_factory(test_dbs['core_1'].dbc.url, - test_dbs['ensembl_genome_metadata'].dbc.url) + test_dbs['ensembl_genome_metadata'].dbc.url, + test_dbs['ncbi_taxonomy'].dbc.url) test.process_core() # Check for insertion of genome_uuid @@ -68,7 +68,6 @@ def test_new_organism(self, test_dbs): assert organism.genomes[0].genebuild_date == '2023-01' # Test the Assembly assert assembly.accession == 'GCF_1111111123.3' - assert assembly.alt_accession == 'GCA_0000012345.3' # select * from genebuild where version = 999 and name = 'genebuild and label =01 dataset = session.query(Dataset).where( (Dataset.version == 'ENS01') & (Dataset.name == 'genebuild') @@ -101,7 +100,9 @@ def test_new_organism(self, test_dbs): assert count == 1 def test_fail_existing_genome_uuid_no_data(self, test_dbs): - test = meta_factory(test_dbs['core_2'].dbc.url, test_dbs['ensembl_genome_metadata'].dbc.url) + test = meta_factory(test_dbs['core_2'].dbc.url, + test_dbs['ensembl_genome_metadata'].dbc.url, + test_dbs['ncbi_taxonomy'].dbc.url) with pytest.raises(MetadataUpdateException) as exif: test.process_core() assert ("Database contains a Genome.genome_uuid, " @@ -109,35 +110,79 @@ def test_fail_existing_genome_uuid_no_data(self, test_dbs): "Please remove it from the meta key and resubmit" in str(exif.value)) def test_update_assembly(self, test_dbs): - test = meta_factory(test_dbs['core_3'].dbc.url, test_dbs['ensembl_genome_metadata'].dbc.url) + test = meta_factory(test_dbs['core_3'].dbc.url, + test_dbs['ensembl_genome_metadata'].dbc.url, + test_dbs['ncbi_taxonomy'].dbc.url) test.process_core() + + # Get the genome_uuid that was just inserted + core_3_db = DBConnection(test_dbs['core_3'].dbc.url) + with core_3_db.session_scope() as core_session: + inserted_meta = core_session.query(Meta).filter( + Meta.species_id == "1", + Meta.meta_key == 'genome.genome_uuid' + ).first() + inserted_genome_uuid = inserted_meta.meta_value + metadata_db = DBConnection(test_dbs['ensembl_genome_metadata'].dbc.url) with metadata_db.session_scope() as session: - organism = session.query(Organism).where(Organism.biosample_id == 'Jabberwocky').first() - assert organism.scientific_name == 'carol_jabberwocky' - assert organism.genomes[1].assembly.accession == 'weird02' - assert organism.genomes[1].genebuild_version == 'ENS01' - assert organism.genomes[1].genebuild_date == '2024-02' + genome = session.query(Genome).filter( + Genome.genome_uuid == inserted_genome_uuid + ).one() + organism = genome.organism + assert organism.scientific_name == 'carol_jabberwocky' + assert genome.assembly.accession == 'weird02' + assert genome.genebuild_version == 'ENS01' + assert genome.genebuild_date == '2024-02' # def test_update_geneset(self, test_dbs): - test = meta_factory(test_dbs['core_4'].dbc.url, test_dbs['ensembl_genome_metadata'].dbc.url) + # Run the update process + test = meta_factory(test_dbs['core_4'].dbc.url, + test_dbs['ensembl_genome_metadata'].dbc.url, + test_dbs['ncbi_taxonomy'].dbc.url) test.process_core() + + # Get the genome_uuid that was just inserted into core_4 by the process + core_4_db = DBConnection(test_dbs['core_4'].dbc.url) + with core_4_db.session_scope() as core_session: + inserted_meta = core_session.query(Meta).filter( + Meta.species_id == "1", + Meta.meta_key == 'genome.genome_uuid' + ).first() + inserted_genome_uuid = inserted_meta.meta_value + + # Now query the metadata database for THIS SPECIFIC genome metadata_db = DBConnection(test_dbs['ensembl_genome_metadata'].dbc.url) with metadata_db.session_scope() as session: - dataset = session.query(Dataset).where( - (Dataset.version == "ENS02") & (Dataset.name == 'genebuild') + # Get the genome that was just created + genome = session.query(Genome).filter( + Genome.genome_uuid == inserted_genome_uuid + ).one() + + # Get the genebuild dataset for THIS genome + genebuild_genome_dataset = session.query(GenomeDataset).filter( + GenomeDataset.genome_id == genome.genome_id + ).join(Dataset).filter( + Dataset.name == 'genebuild', + Dataset.version == 'ENS02' ).first() + + assert genebuild_genome_dataset is not None + dataset = genebuild_genome_dataset.dataset + assert dataset is not None assert re.match(".*_core_4", dataset.dataset_source.name) assert dataset.dataset_source.type == "core" assert dataset.dataset_type.name == "genebuild" - assert dataset.genome_datasets[0].genome.genebuild_version == 'ENS02' - assert dataset.genome_datasets[0].genome.genebuild_date == '2023-01' - assert dataset.genome_datasets[0].genome.genome_releases is not None + assert genome.genebuild_version == 'ENS02' + assert genome.genebuild_date == '2023-01' # From core_4 meta table + assert len(genome.genome_releases) > 0 def test_taxonomy_common_name(self, test_dbs): - test = meta_factory(test_dbs['core_5'].dbc.url, test_dbs['ensembl_genome_metadata'].dbc.url) + test = meta_factory(test_dbs['core_5'].dbc.url, + test_dbs['ensembl_genome_metadata'].dbc.url, + test_dbs['ncbi_taxonomy'].dbc.url) test.process_core() metadata_db = DBConnection(test_dbs['ensembl_genome_metadata'].dbc.url) with metadata_db.session_scope() as session: @@ -145,55 +190,18 @@ def test_taxonomy_common_name(self, test_dbs): assert organism.common_name == 'Sheep' def test_fail_existing_genome_uuid_data_not_match(self, test_dbs): - test = meta_factory(test_dbs['core_6'].dbc.url, test_dbs['ensembl_genome_metadata'].dbc.url) + test = meta_factory(test_dbs['core_6'].dbc.url, + test_dbs['ensembl_genome_metadata'].dbc.url, + test_dbs['ncbi_taxonomy'].dbc.url) with pytest.raises(MetadataUpdateException) as exif: test.process_core() assert ("Core database contains a genome.genome_uuid which matches an entry in the meta table. " "The force flag was not specified so the core was not updated." in str(exif.value)) - # def test_update_unreleased_no_force(self, test_dbs): - # test = meta_factory(test_dbs['core_7'].dbc.url, test_dbs['ensembl_genome_metadata'].dbc.url) - # test.process_core() - # metadata_db = DBConnection(test_dbs['ensembl_genome_metadata'].dbc.url) - # with metadata_db.session_scope() as session: - # # Check that the old datasets have been removed - # genebuild_test = session.query(Dataset).join(DatasetSource).join(DatasetType).filter( - # DatasetSource.name.like('%core_5'), - # ).filter(DatasetType.name == "genebuild").one_or_none() - # assert genebuild_test is None - # - # count = session.query(DatasetAttribute).join(Attribute).filter( - # Attribute.name == 'genebuild.provider_name', - # DatasetAttribute.value == 'removed_for_test' - # ).count() - # assert count == 0 - # - # # Check that the new dataset are present and not duplicated - # count = session.query(Dataset).join(DatasetSource).join(DatasetType).filter( - # DatasetSource.name.like('%core_7'), - # DatasetType.name == 'assembly' - # ).count() - # assert count == 0 - # count = session.query(Dataset).join(DatasetSource).join(DatasetType).filter( - # DatasetSource.name.like('%core_7'), - # DatasetType.name == 'genebuild' - # ).count() - # assert count == 1 - # # Check that new assembly attribute values are not present - # count = session.query(DatasetAttribute).join(Attribute).filter( - # Attribute.name == 'assembly.ucsc_alias', - # DatasetAttribute.value == 'test_alias' - # ).count() - # assert count == 0 - # # Check that new genebuild attribute values are present - # count = session.query(DatasetAttribute).join(Attribute).filter( - # Attribute.name == 'genebuild.havana_datafreeze_date', - # DatasetAttribute.value == 'test2' - # ).count() - # assert count > 0 - def test_update_released(self, test_dbs): - test = meta_factory(test_dbs['core_8'].dbc.url, test_dbs['ensembl_genome_metadata'].dbc.url) + test = meta_factory(test_dbs['core_8'].dbc.url, + test_dbs['ensembl_genome_metadata'].dbc.url, + test_dbs['ncbi_taxonomy'].dbc.url) with pytest.raises(Exception) as exif: test.process_core() assert ("Existing Organism, Assembly, and Datasets within a release. ") diff --git a/src/tests/test_utils.py b/src/tests/test_utils.py index caaa13ae..4d088a79 100644 --- a/src/tests/test_utils.py +++ b/src/tests/test_utils.py @@ -523,8 +523,8 @@ def test_get_genomes_by_name(self, genome_conn): }, 'release': { 'isCurrent': True, - 'releaseDate': '2023-10-18', - 'releaseLabel': 'MVP Beta-1', + 'releaseDate': '2020-10-18', + 'releaseLabel': '2020-10-18', 'releaseType': 'partial', 'releaseVersion': 110.1, 'siteLabel': 'MVP Ensembl', @@ -562,8 +562,7 @@ def test_get_genomes_by_name_release_unspecified(self, genome_conn): 'ensemblName': 'WBcel235', 'isReference': True, 'level': 'chromosome', - 'name': 'WBcel235', - 'urlName': 'wbcel235' + 'name': 'WBcel235' }, 'attributesInfo': { 'assemblyDate': '2012-12', @@ -594,7 +593,7 @@ def test_get_genomes_by_name_release_unspecified(self, genome_conn): }, 'release': { 'releaseDate': '2023-06-15', - 'releaseLabel': 'First Beta', + 'releaseLabel': '2023-06-15', 'releaseType': 'partial', 'releaseVersion': 108.0, 'siteLabel': 'MVP Ensembl', @@ -642,7 +641,7 @@ def test_get_genomes_by_name_release_unspecified(self, genome_conn): "genome_tag, expected_output", [ # url_name = GRCh38 => homo_sapien 38 - ("GRCh38", {"genomeUuid": "a7335667-93e7-11ec-a39d-005056b38ce3"}), + ("grch38", {"genomeUuid": "a7335667-93e7-11ec-a39d-005056b38ce3"}), # Null ("iDontExist", {}), ] From 836cddd35308de2e3467b536d8537e595680f680 Mon Sep 17 00:00:00 2001 From: danielp Date: Thu, 16 Oct 2025 09:23:39 +0100 Subject: [PATCH 05/14] Predeletion commit of sqlite testing Includes three scripts for moving txt/sql files to a mysql server and dumping them back as sqlite. The conftest.py has options for running with the sqlite db A second commit will follow with deletion of all mysql related testing --- src/tests/conftest.py | 149 ++- src/tests/databases/compara_db/genome_db.txt | 36 +- .../ensembl_genome_metadata/assembly.txt | 38 +- .../assembly_sequence.txt | 226 ++--- .../ensembl_genome_metadata/attribute.txt | 208 ++-- .../ensembl_genome_metadata/dataset.txt | 898 +++++++++--------- .../dataset_attribute.txt | 6 +- .../dataset_source.txt | 106 +-- .../ensembl_genome_metadata/dataset_type.txt | 68 +- .../ensembl_release.txt | 12 +- .../ensembl_genome_metadata/genome.txt | 40 +- .../genome_dataset.txt | 2 +- .../ncbi_taxa_name.txt | 6 +- .../ensembl_genome_metadata/table.sql | 489 +++++----- 14 files changed, 1197 insertions(+), 1087 deletions(-) diff --git a/src/tests/conftest.py b/src/tests/conftest.py index e01c9bf2..fa90afcd 100644 --- a/src/tests/conftest.py +++ b/src/tests/conftest.py @@ -1,25 +1,12 @@ -# See the NOTICE file distributed with this work for additional information -# regarding copyright ownership. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" Necessary fixtures for our GRPC API tests """ import os +import shutil +import tempfile from pathlib import Path import pytest import sqlalchemy as db from _pytest.config import Config +from ensembl.utils.database import DBConnection from grpc_reflection.v1alpha import reflection from ensembl.production.metadata.api.adaptors import GenomeAdaptor @@ -30,8 +17,108 @@ from ensembl.production.metadata.grpc import ensembl_metadata_pb2 +def pytest_addoption(parser): + """Add custom command line options.""" + parser.addoption( + "--use-sqlite-dbs", + action="store_true", + default=False, + help="Use pre-converted SQLite .db files instead of creating temporary databases", + ) + + def pytest_configure(config: Config) -> None: - pytest.dbs_dir = Path(__file__).parent / 'databases' + pytest.dbs_dir = Path(__file__).parent / "databases" + + +@pytest.fixture(scope="session") +def use_sqlite_dbs(request): + """Check if we should use SQLite .db files.""" + return request.config.getoption("--use-sqlite-dbs") + + +@pytest.fixture(scope="module") +def test_dbs(request, use_sqlite_dbs): + """ + Test database fixture - supports both SQLite and MySQL. + + With --use-sqlite-dbs: Uses pre-converted .db files (creates temporary copies for isolation) + Without flag: Creates temporary MySQL databases using UnitTestDB + + Note: SQLite databases are copied to temp directories to ensure test isolation. + Changes made during tests won't affect the original .db files. + """ + from ensembl.utils.database import UnitTestDB + + db_configs = request.param if hasattr(request, "param") else [] + test_databases = {} + temp_resources = [] # Track resources for cleanup + + for db_config in db_configs: + src_path = db_config["src"] + db_name = src_path.name + + if use_sqlite_dbs: + # Use pre-converted SQLite .db files with temporary copies for isolation + sqlite_file = src_path.parent / f"{db_name}.db" + + if not sqlite_file.exists(): + raise FileNotFoundError( + f"SQLite database not found: {sqlite_file}\n" + f"Please convert it first using your conversion script." + ) + + # Create temporary copy to ensure test isolation + temp_dir = tempfile.mkdtemp(prefix=f"pytest_{db_name}_") + temp_db_file = Path(temp_dir) / f"{db_name}_test.db" + + print(f"\n>>> Using SQLite database: {sqlite_file}") + print(f" (temporary copy: {temp_db_file})") + + shutil.copy2(sqlite_file, temp_db_file) + + # Create connection to temporary copy + db_url = f"sqlite:///{temp_db_file}" + test_databases[db_name] = type("TestDB", (object,), { + "dbc": DBConnection(db_url), + "drop": lambda: None, # Add no-op drop method for consistency + })() + + temp_resources.append((temp_db_file, temp_dir)) + + else: + # Use MySQL with UnitTestDB (creates temporary databases from dumps) + server_url = request.config.getoption( + "--server", "mysql://ensembl@localhost:3306/?local_infile=1" + ) + print(f"\n>>> Creating temporary MySQL database from: {src_path}") + test_db = UnitTestDB(server_url, dump_dir=src_path, name=db_name) + test_databases[db_name] = test_db + + yield test_databases + + # Cleanup + if use_sqlite_dbs: + # Close SQLite connections and remove temporary files + for db_name, test_db in test_databases.items(): + if hasattr(test_db.dbc, 'dispose'): + test_db.dbc.dispose() + + # Remove temporary files and directories + for temp_file, temp_dir in temp_resources: + try: + if temp_file.exists(): + temp_file.unlink() + if Path(temp_dir).exists(): + shutil.rmtree(temp_dir) + print(f">>> Cleaned up temporary SQLite copy: {temp_dir}") + except Exception as e: + print(f"Warning: Failed to cleanup {temp_dir}: {e}") + else: + # Drop temporary MySQL databases + for db_name, test_db in test_databases.items(): + if hasattr(test_db, "drop"): + test_db.drop() @pytest.fixture(scope="module", autouse=True) @@ -45,16 +132,14 @@ def engine(test_dbs): def genome_conn(test_dbs): genome_conn = GenomeAdaptor( metadata_uri=test_dbs["ensembl_genome_metadata"].dbc.url, - taxonomy_uri=test_dbs["ncbi_taxonomy"].dbc.url + taxonomy_uri=test_dbs["ncbi_taxonomy"].dbc.url, ) yield genome_conn + @pytest.fixture(scope="function") def vep_conn(test_dbs): - vep_conn = VepAdaptor( - metadata_uri=test_dbs["ensembl_genome_metadata"].dbc.url, - file="all" - ) + vep_conn = VepAdaptor(metadata_uri=test_dbs["ensembl_genome_metadata"].dbc.url, file="all") yield vep_conn @@ -62,15 +147,14 @@ def vep_conn(test_dbs): def allow_unreleased(request): """Set ALLOWED_UNRELEASED environment variable, this fixture must be used with `parametrize`""" from ensembl.production.metadata.grpc.config import cfg + cfg.allow_unreleased = request.param yield cfg @pytest.fixture(scope="class") def release_conn(test_dbs): - release_conn = ReleaseAdaptor( - metadata_uri=test_dbs["ensembl_genome_metadata"].dbc.url - ) + release_conn = ReleaseAdaptor(metadata_uri=test_dbs["ensembl_genome_metadata"].dbc.url) yield release_conn @@ -84,25 +168,28 @@ def dataset_factory(test_dbs): yield DatasetFactory(test_dbs["ensembl_genome_metadata"].dbc.url) -@pytest.fixture(scope='module') +@pytest.fixture(scope="module") def grpc_add_to_server(): - from ensembl.production.metadata.grpc.ensembl_metadata_pb2_grpc import add_EnsemblMetadataServicer_to_server + from ensembl.production.metadata.grpc.ensembl_metadata_pb2_grpc import ( + add_EnsemblMetadataServicer_to_server, + ) return add_EnsemblMetadataServicer_to_server -@pytest.fixture(scope='module') +@pytest.fixture(scope="module") def grpc_servicer(test_dbs, engine): from ensembl.production.metadata.grpc.servicer import EnsemblMetadataServicer + return EnsemblMetadataServicer() -@pytest.fixture(scope='module') +@pytest.fixture(scope="module") def grpc_server(_grpc_server, grpc_addr, grpc_add_to_server, grpc_servicer): grpc_add_to_server(grpc_servicer, _grpc_server) SERVICE_NAMES = ( - ensembl_metadata_pb2.DESCRIPTOR.services_by_name['EnsemblMetadata'].full_name, - reflection.SERVICE_NAME + ensembl_metadata_pb2.DESCRIPTOR.services_by_name["EnsemblMetadata"].full_name, + reflection.SERVICE_NAME, ) reflection.enable_server_reflection(SERVICE_NAMES, _grpc_server) _grpc_server.add_insecure_port(grpc_addr) diff --git a/src/tests/databases/compara_db/genome_db.txt b/src/tests/databases/compara_db/genome_db.txt index a5e212c7..f4012ca9 100644 --- a/src/tests/databases/compara_db/genome_db.txt +++ b/src/tests/databases/compara_db/genome_db.txt @@ -1,18 +1,18 @@ -47 6239 caenorhabditis_elegans WBcel235 2014-10 \N strain N2 Caenorhabditis elegans (Nematode, N2) \N 110 \N -30 511145 escherichia_coli_str_k_12_substr_mg1655 ASM584v2 2018-09 \N strain K-12 substr. MG1655 Escherichia coli str. K-12 substr. MG1655 str. K12 (GCA_000005845) \N 110 \N -12 9606 homo_sapiens GRCh38 2023-03 \N \N Human \N 110 \N -11 9606 homo_sapiens_37 GRCh37 2013-09 \N \N Human \N 110 \N -23 9606 homo_sapiens_gca018469415v1 HG03516.alt.pat.f1_v2 2022-07 \N population Esan in Nigeria Homo sapiens (Human) - GCA_018469415.1 \N 110 \N -24 9606 homo_sapiens_gca018469425v1 HG03516.pri.mat.f1_v2 2022-07 \N population Esan in Nigeria Homo sapiens (Human) - GCA_018469425.1 \N 110 \N -31 9606 homo_sapiens_gca018469875v1 HG02622.pri.mat.f1_v2 2022-07 \N population Gambian in Western Division Homo sapiens (Human) - GCA_018469875.1 \N 110 \N -32 9606 homo_sapiens_gca018469925v1 HG02622.alt.pat.f1_v2 2022-07 \N population Gambian in Western Division Homo sapiens (Human) - GCA_018469925.1 \N 110 \N -19 9606 homo_sapiens_gca018473295v1 HG03540.pri.mat.f1_v2 2022-08 \N population Gambian in Western Division Homo sapiens (Human) - GCA_018473295.1 \N 110 \N -21 9606 homo_sapiens_gca018473315v1 HG03540.alt.pat.f1_v2 2022-08 \N population Gambian in Western Division Homo sapiens (Human) - GCA_018473315.1 \N 110 \N -42 9606 homo_sapiens_gca018505825v1 HG02109.pri.mat.f1_v2 2022-07 \N population African from Barbados Homo sapiens (Human) - GCA_018505825.1 \N 110 \N -46 9606 homo_sapiens_gca018505865v1 HG02109.alt.pat.f1_v2 2022-07 \N population African from Barbados Homo sapiens (Human) - GCA_018505865.1 \N 110 \N -6 9606 homo_sapiens_gca018852605v1 HG002.alt.pat.f1_v2 2022-07 \N population European Homo sapiens (Human) - GCA_018852605.1 \N 110 \N -7 9606 homo_sapiens_gca018852615v1 HG002.pri.mat.f1_v2 2022-07 \N population European Homo sapiens (Human) - GCA_018852615.1 \N 110 \N -8 9606 homo_sapiens_gca021950905v1 HG002.pat.cur.20211005 2022-07 \N population European Homo sapiens (Human) - GCA_021950905.1 \N 110 \N -9 9606 homo_sapiens_gca021951015v1 HG002.mat.cur.20211005 2022-07 \N population European Homo sapiens (Human) - GCA_021951015.1 \N 110 \N -16 559292 saccharomyces_cerevisiae R64-1-1 2018-10 \N strain S288C Saccharomyces cerevisiae \N 110 \N -20 4565 triticum_aestivum IWGSC 2018-04-IWGSC \N cultivar Chinese Spring Triticum aestivum \N 110 \N +47 6239 caenorhabditis_elegans WBcel235 2014-10 0 0 \N strain N2 Caenorhabditis elegans (Nematode, N2) \N 110 \N +30 511145 escherichia_coli_str_k_12_substr_mg1655 ASM584v2 2018-09 0 0 \N strain K-12 substr. MG1655 Escherichia coli str. K-12 substr. MG1655 str. K12 (GCA_000005845) \N 110 \N +12 9606 homo_sapiens GRCh38 2023-03 0 0 \N \N Human \N 110 \N +11 9606 homo_sapiens_37 GRCh37 2013-09 0 0 \N \N Human \N 110 \N +23 9606 homo_sapiens_gca018469415v1 HG03516.alt.pat.f1_v2 2022-07 0 0 \N population Esan in Nigeria Homo sapiens (Human) - GCA_018469415.1 \N 110 \N +24 9606 homo_sapiens_gca018469425v1 HG03516.pri.mat.f1_v2 2022-07 0 0 \N population Esan in Nigeria Homo sapiens (Human) - GCA_018469425.1 \N 110 \N +31 9606 homo_sapiens_gca018469875v1 HG02622.pri.mat.f1_v2 2022-07 0 0 \N population Gambian in Western Division Homo sapiens (Human) - GCA_018469875.1 \N 110 \N +32 9606 homo_sapiens_gca018469925v1 HG02622.alt.pat.f1_v2 2022-07 0 0 \N population Gambian in Western Division Homo sapiens (Human) - GCA_018469925.1 \N 110 \N +19 9606 homo_sapiens_gca018473295v1 HG03540.pri.mat.f1_v2 2022-08 0 0 \N population Gambian in Western Division Homo sapiens (Human) - GCA_018473295.1 \N 110 \N +21 9606 homo_sapiens_gca018473315v1 HG03540.alt.pat.f1_v2 2022-08 0 0 \N population Gambian in Western Division Homo sapiens (Human) - GCA_018473315.1 \N 110 \N +42 9606 homo_sapiens_gca018505825v1 HG02109.pri.mat.f1_v2 2022-07 0 0 \N population African from Barbados Homo sapiens (Human) - GCA_018505825.1 \N 110 \N +46 9606 homo_sapiens_gca018505865v1 HG02109.alt.pat.f1_v2 2022-07 0 0 \N population African from Barbados Homo sapiens (Human) - GCA_018505865.1 \N 110 \N +6 9606 homo_sapiens_gca018852605v1 HG002.alt.pat.f1_v2 2022-07 0 0 \N population European Homo sapiens (Human) - GCA_018852605.1 \N 110 \N +7 9606 homo_sapiens_gca018852615v1 HG002.pri.mat.f1_v2 2022-07 0 0 \N population European Homo sapiens (Human) - GCA_018852615.1 \N 110 \N +8 9606 homo_sapiens_gca021950905v1 HG002.pat.cur.20211005 2022-07 0 0 \N population European Homo sapiens (Human) - GCA_021950905.1 \N 110 \N +9 9606 homo_sapiens_gca021951015v1 HG002.mat.cur.20211005 2022-07 0 0 \N population European Homo sapiens (Human) - GCA_021951015.1 \N 110 \N +16 559292 saccharomyces_cerevisiae R64-1-1 2018-10 0 0 \N strain S288C Saccharomyces cerevisiae \N 110 \N +20 4565 triticum_aestivum IWGSC 2018-04-IWGSC 0 0 \N cultivar Chinese Spring Triticum aestivum \N 110 \N \ No newline at end of file diff --git a/src/tests/databases/ensembl_genome_metadata/assembly.txt b/src/tests/databases/ensembl_genome_metadata/assembly.txt index 084db5df..573ea4fe 100644 --- a/src/tests/databases/ensembl_genome_metadata/assembly.txt +++ b/src/tests/databases/ensembl_genome_metadata/assembly.txt @@ -1,19 +1,19 @@ -1 \N GCA_000005845.2 chromosome ASM584v2 \N ASM584v2 \N 2023-09-22 15:01:43.000000 ASM584v2 \N 532aa68f-6500-404e-a470-8afb718a770a 1 asm584v2 -4 \N GCA_018473315.1 primary_assembly HG03540.alt.pat.f1_v2 \N HG03540.alt.pat.f1_v2 \N 2023-09-22 15:02:00.000000 HG03540.alt.pat.f1_v2 \N 9d2dc346-358a-4c70-8fd8-3ff194246a76 0 \N -5 \N GCA_018469415.1 primary_assembly HG03516.alt.pat.f1_v2 \N HG03516.alt.pat.f1_v2 \N 2023-09-22 15:02:01.000000 HG03516.alt.pat.f1_v2 \N 1551e511-bde7-40cf-95cd-de4059678c6f 0 \N -6 \N GCA_018469875.1 primary_assembly HG02622.pri.mat.f1_v2 \N HG02622.pri.mat.f1_v2 \N 2023-09-22 15:02:02.000000 HG02622.pri.mat.f1_v2 \N 960de156-eced-4916-ac64-263d9a89dc3b 0 \N -7 \N GCA_018505825.1 primary_assembly HG02109.pri.mat.f1_v2 \N HG02109.pri.mat.f1_v2 \N 2023-09-22 15:02:04.000000 HG02109.pri.mat.f1_v2 \N fc4e0ec5-7230-44b9-92aa-6788356158a8 0 \N -9 \N GCA_018852615.1 primary_assembly HG002.pri.mat.f1_v2 \N HG002.pri.mat.f1_v2 \N 2023-09-22 15:02:11.000000 HG002.pri.mat.f1_v2 \N 96b3f68d-d3d2-4107-a003-39cb0d67075f 0 \N -15 \N GCA_000002765.2 chromosome ASM276v2 \N ASM276v2 \N 2023-09-22 15:03:01.000000 ASM276v2 \N 23d2caa4-5120-4cc7-a73a-42aad4b6b1d9 1 asm276v2 -18 \N GCA_021950905.1 primary_assembly HG002.pat.cur.20211005 \N HG002.pat.cur.20211005 \N 2023-09-22 15:03:01.000000 HG002.pat.cur.20211005 \N 7a191f4e-0840-4aed-9302-8fab1157a361 0 \N -40 hg19 GCA_000001405.14 chromosome GRCh37.p13 \N GRCh37 \N 2023-09-22 15:03:21.000000 GRCh37.p13 \N 9d6b239c-46dd-4c79-bc29-1089f348d31d 0 grch37 -79 \N GCA_900519105.1 chromosome IWGSC \N IWGSC \N 2023-09-22 15:04:29.000000 IWGSC \N 36d6c4f3-8072-4ae3-a485-84a070e725e3 1 iwgsc -92 hg38 GCA_000001405.29 chromosome GRCh38.p14 \N GRCh38 \N 2023-09-22 15:04:45.000000 GRCh38.p14 \N fd7fea38-981a-4d73-a879-6f9daef86f08 1 grch38 -97 \N GCA_018505865.1 primary_assembly HG02109.alt.pat.f1_v2 \N HG02109.alt.pat.f1_v2 \N 2023-09-22 15:04:50.000000 HG02109.alt.pat.f1_v2 \N 373c34c3-d482-4ebb-8f48-baee7c548583 0 \N -100 \N GCA_018852605.1 primary_assembly HG002.alt.pat.f1_v2 \N HG002.alt.pat.f1_v2 \N 2023-09-22 15:04:53.000000 HG002.alt.pat.f1_v2 \N b6883d52-cc9d-43d2-bdbd-97bdaf903cec 0 \N -107 \N GCA_018469925.1 primary_assembly HG02622.alt.pat.f1_v2 \N HG02622.alt.pat.f1_v2 \N 2023-09-22 15:04:56.000000 HG02622.alt.pat.f1_v2 \N a283efd6-d125-47df-8b3c-4757ae496231 0 \N -135 \N GCA_018469425.1 primary_assembly HG03516.pri.mat.f1_v2 \N HG03516.pri.mat.f1_v2 \N 2023-09-22 15:05:37.000000 HG03516.pri.mat.f1_v2 \N 93f7cd36-49e3-4c89-826b-3b2e4be0c40a 0 \N -180 \N GCA_021951015.1 primary_assembly HG002.mat.cur.20211005 \N HG002.mat.cur.20211005 \N 2023-09-22 15:06:39.000000 HG002.mat.cur.20211005 \N 696aa33e-f239-460e-9fcc-b6bb6908d726 0 \N -186 \N GCA_018473295.1 primary_assembly HG03540.pri.mat.f1_v2 \N HG03540.pri.mat.f1_v2 \N 2023-09-22 15:06:43.000000 HG03540.pri.mat.f1_v2 \N 8c71dc33-a49f-4be3-a3ad-4404fb374344 0 \N -216 \N GCA_000146045.2 chromosome R64-1-1 \N R64-1-1 \N 2023-09-22 15:06:55.000000 R64-1-1 \N 86cb493f-57cf-4c5a-8358-ef69952baf03 1 r64-1-1 -219 \N GCA_000002985.3 chromosome WBcel235 \N WBcel235 \N 2023-09-22 15:06:58.000000 WBcel235 \N 2598e56f-a579-4fec-9525-0939563056bd 1 wbcel235 +1 \N GCA_000005845.2 chromosome ASM584v2 \N ASM584v2 \N 2023-09-22 15:01:43 ASM584v2 532aa68f-6500-404e-a470-8afb718a770a 1 +4 \N GCA_018473315.1 primary_assembly HG03540.alt.pat.f1_v2 \N HG03540.alt.pat.f1_v2 \N 2023-09-22 15:02:00 HG03540.alt.pat.f1_v2 9d2dc346-358a-4c70-8fd8-3ff194246a76 0 +5 \N GCA_018469415.1 primary_assembly HG03516.alt.pat.f1_v2 \N HG03516.alt.pat.f1_v2 \N 2023-09-22 15:02:01 HG03516.alt.pat.f1_v2 1551e511-bde7-40cf-95cd-de4059678c6f 0 +6 \N GCA_018469875.1 primary_assembly HG02622.pri.mat.f1_v2 \N HG02622.pri.mat.f1_v2 \N 2023-09-22 15:02:02 HG02622.pri.mat.f1_v2 960de156-eced-4916-ac64-263d9a89dc3b 0 +7 \N GCA_018505825.1 primary_assembly HG02109.pri.mat.f1_v2 \N HG02109.pri.mat.f1_v2 \N 2023-09-22 15:02:04 HG02109.pri.mat.f1_v2 fc4e0ec5-7230-44b9-92aa-6788356158a8 0 +9 \N GCA_018852615.1 primary_assembly HG002.pri.mat.f1_v2 \N HG002.pri.mat.f1_v2 \N 2023-09-22 15:02:11 HG002.pri.mat.f1_v2 96b3f68d-d3d2-4107-a003-39cb0d67075f 0 +15 \N GCA_000002765.2 chromosome ASM276v2 \N ASM276v2 \N 2023-09-22 15:03:01 ASM276v2 23d2caa4-5120-4cc7-a73a-42aad4b6b1d9 1 +18 \N GCA_021950905.1 primary_assembly HG002.pat.cur.20211005 \N HG002.pat.cur.20211005 \N 2023-09-22 15:03:01 HG002.pat.cur.20211005 7a191f4e-0840-4aed-9302-8fab1157a361 0 +40 hg19 GCA_000001405.14 chromosome GRCh37.p13 \N GRCh37 \N 2023-09-22 15:03:21 GRCh37.p13 9d6b239c-46dd-4c79-bc29-1089f348d31d 0 +79 \N GCA_900519105.1 chromosome IWGSC \N IWGSC \N 2023-09-22 15:04:29 IWGSC 36d6c4f3-8072-4ae3-a485-84a070e725e3 1 +92 hg38 GCA_000001405.29 chromosome GRCh38.p14 \N GRCh38 \N 2023-09-22 15:04:45 GRCh38.p14 fd7fea38-981a-4d73-a879-6f9daef86f08 1 +97 \N GCA_018505865.1 primary_assembly HG02109.alt.pat.f1_v2 \N HG02109.alt.pat.f1_v2 \N 2023-09-22 15:04:50 HG02109.alt.pat.f1_v2 373c34c3-d482-4ebb-8f48-baee7c548583 0 +100 \N GCA_018852605.1 primary_assembly HG002.alt.pat.f1_v2 \N HG002.alt.pat.f1_v2 \N 2023-09-22 15:04:53 HG002.alt.pat.f1_v2 b6883d52-cc9d-43d2-bdbd-97bdaf903cec 0 +107 \N GCA_018469925.1 primary_assembly HG02622.alt.pat.f1_v2 \N HG02622.alt.pat.f1_v2 \N 2023-09-22 15:04:56 HG02622.alt.pat.f1_v2 a283efd6-d125-47df-8b3c-4757ae496231 0 +135 \N GCA_018469425.1 primary_assembly HG03516.pri.mat.f1_v2 \N HG03516.pri.mat.f1_v2 \N 2023-09-22 15:05:37 HG03516.pri.mat.f1_v2 93f7cd36-49e3-4c89-826b-3b2e4be0c40a 0 +180 \N GCA_021951015.1 primary_assembly HG002.mat.cur.20211005 \N HG002.mat.cur.20211005 \N 2023-09-22 15:06:39 HG002.mat.cur.20211005 696aa33e-f239-460e-9fcc-b6bb6908d726 0 +186 \N GCA_018473295.1 primary_assembly HG03540.pri.mat.f1_v2 \N HG03540.pri.mat.f1_v2 \N 2023-09-22 15:06:43 HG03540.pri.mat.f1_v2 8c71dc33-a49f-4be3-a3ad-4404fb374344 0 +216 \N GCA_000146045.2 chromosome R64-1-1 \N R64-1-1 \N 2023-09-22 15:06:55 R64-1-1 86cb493f-57cf-4c5a-8358-ef69952baf03 1 +219 \N GCA_000002985.3 chromosome WBcel235 \N WBcel235 \N 2023-09-22 15:06:58 WBcel235 2598e56f-a579-4fec-9525-0939563056bd 1 diff --git a/src/tests/databases/ensembl_genome_metadata/assembly_sequence.txt b/src/tests/databases/ensembl_genome_metadata/assembly_sequence.txt index d89dd85e..a43b36a6 100644 --- a/src/tests/databases/ensembl_genome_metadata/assembly_sequence.txt +++ b/src/tests/databases/ensembl_genome_metadata/assembly_sequence.txt @@ -1,113 +1,113 @@ -1871 1 1 1 249250621 SO:0000738 1b22b98cdeb4a9304cb5d48026a85128 40 1 S_KjnFVz-FE7M0W6yoaUDgYxLPc1jyWU 0 -1872 10 10 1 135534747 SO:0000738 988c28e000e84c26d552359af1ea2e1d 40 10 -BOZ8Esn8J88qDwNiSEwUr5425UXdiGX 0 -1873 11 11 1 135006516 SO:0000738 98c59049a2df285c76ffb1c6db8f8b96 40 11 XXi2_O1ly-CCOi3HP5TypAw7LtC6niFG 0 -1874 12 12 1 133851895 SO:0000738 51851ac0e1a115847ad36449b0015864 40 12 105bBysLoDFQHhajooTAUyUkNiZ8LJEH 0 -1875 13 13 1 115169878 SO:0000738 283f8d7892baa81b510a015719ca7b0b 40 13 Ewb9qlgTqN6e_XQiRVYpoUfZJHXeiUfH 0 -1876 14 14 1 107349540 SO:0000738 98f3cae32b2a2e9524bc19813927542e 40 14 5Ji6FGEKfejK1U6BMScqrdKJK8GqmIGf 0 -1877 15 15 1 102531392 SO:0000738 e5645a794a8238215b2cd77acb95a078 40 15 zIMZb3Ft7RdWa5XYq0PxIlezLY2ccCgt 0 -1878 16 16 1 90354753 SO:0000738 fc9b1a7b42b97a864f56b348b06095e6 40 16 W6wLoIFOn4G7cjopxPxYNk2lcEqhLQFb 0 -1879 17 17 1 81195210 SO:0000738 351f64d4f4f9ddd45b35336ad97aa6de 40 17 AjWXsI7AkTK35XW9pgd3UbjpC3MAevlz 0 -1880 18 18 1 78077248 SO:0000738 b15d4b2d29dde9d3e4f93d1d0f2cbc9c 40 18 BTj4BDaaHYoPhD3oY2GdwC_l0uqZ92UD 0 -1893 GL000191.1 GL000191.1 0 106433 SO:0000738 d75b436f50a8214ee9c2a51d30b2c2cc 40 \N aX54PSRCZbj0EVn5QAH4zoO72gsmSTO8 0 -1894 GL000192.1 GL000192.1 0 547496 SO:0000738 325ba9e808f669dfeee210fdd7b470ac 40 \N udMJQEKIyWPe8YGW1Dws6IHk_1NbkY9Y 0 -1895 GL000193.1 GL000193.1 0 189789 SO:0000738 dbb6e8ece0b5de29da56601613007c2a 40 \N be3_RQlT0dXc4jYLkbEiRC6HSl7u1FjF 0 -1896 GL000194.1 GL000194.1 0 191469 SO:0000738 6ac8f815bf8e845bb3031b73f812c012 40 \N WyYCLC4VxJvbBz2b_wBWF5BdQotiUVdB 0 -1897 GL000195.1 GL000195.1 0 182896 SO:0000738 5d9ec007868d517e73543b005ba48535 40 \N 2LEWMcieZGf9Sx4VpEeWSDcULUVHGm0w 0 -1955 1 1 1 248956422 SO:0000738 2648ae1bacce4ec4b6cf337dcae37816 92 1 2YnepKM7OkBoOrKmvHbGqguVfF9amCST 0 -1956 10 10 1 133797422 SO:0000738 907112d17fcb73bcab1ed1c72b97ce68 92 10 P6q4sxSkFfKZpUgEwW73rx2a2ZYY-_pH 0 -1957 11 11 1 135086622 SO:0000738 1511375dc2dd1b633af8cf439ae90cec 92 11 2NkFm8HK88MqeNkCgj78KidCAXgnsfV1 0 -1958 12 12 1 133275309 SO:0000738 e81e16d3f44337034695a29b97708fce 92 12 7dzBrNZj_CM_Dg7zLl--e18KI8wVUxEd 0 -1959 13 13 1 114364328 SO:0000738 17dab79b963ccd8e7377cef59a54fe1c 92 13 0qw_sn8Cl7OmMTFlukjFD2DUejW0T80Y 0 -1960 14 14 1 107043718 SO:0000738 acbd9552c059d9b403e75ed26c1ce5bc 92 14 eK4D2MosgK_ivBkgi6FVPg5UXs1bYESm 0 -1961 15 15 1 101991189 SO:0000738 f036bd11158407596ca6bf3581454706 92 15 AsXvWL1-2i5U_buw6_niVIxD6zTbAuS6 0 -1962 16 16 1 90338345 SO:0000738 24e7cabfba3548a2bb4dff582b9ee870 92 16 EjrUp_S2oCd2b_SdqeZeOYUkEC966iVh 0 -1963 17 17 1 83257441 SO:0000738 a8499ca51d6fb77332c2d242923994eb 92 17 upqChCoU-Gtd_61IidCsln-r8cxUTFeP 0 -1964 18 18 1 80373285 SO:0000738 11eeaa801f6b0e2e36a1138616b8ee9a 92 18 vWwFhJ5lQDMhh-czg06YtlWqu0lvFAZV 0 -53919 1 1 1 640851 SO:0000738 46d861cab72441c63589339b36e644ac 15 1 JfN51lUFS8sH2f2-OQ58B1f-ZFmxtjEO 0 -53920 10 10 1 1687656 SO:0000738 7f3f93983d66669bf5e18266f2565aa4 15 10 nv-h1XrxCbnvlyAlLn612PX8kJBmpf9m 0 -53921 11 11 1 2038340 SO:0000738 3733d85e3f9fd8c5284e70dc977950b7 15 11 GEeKuO07bJBfzpd4KKpQPasLKNWpBia6 0 -53922 12 12 1 2271494 SO:0000738 81d7ade8026e3099dc4a595a27ce5fe4 15 12 qoUuu7OZJT_jdUqwbk_gk-yvozwqFNdQ 0 -53923 13 13 1 2925236 SO:0000738 2e5f27c4aa13202464e20c520a90bddc 15 13 HuuhxwpHdyAZqDG3ex62hIuMZ-FHd5Io 0 -53924 14 14 1 3291936 SO:0000738 2bfeb85b8b4486aa4b03c3cb4dc56dad 15 14 mJezSE0gh0LS5XVFlfR6PZIRw3-eLyAR 0 -53925 2 2 1 947102 SO:0000738 3264ffcaf0fb7e7c6adf14088c866886 15 2 tpNy-HkCz6Cq62euyOlWNduv91huwg7p 0 -53926 3 3 1 1067971 SO:0000738 1a2a92569065a83856362480f86aaf17 15 3 YqknWYibUjSC4bFoJwSTAcXKBC4uld3x 0 -53927 4 4 1 1200490 SO:0000738 3bb29def1493995037b9426acc5cdc20 15 4 jETGmEu2CRUEy_e3SK9zDfszR9-a9UmS 0 -53928 5 5 1 1343557 SO:0000738 bf203f82beeabc7337d1dcb50bca14cf 15 5 6UHvnqsyEKDTHAzF6FZ0OHgDVVeQ6AZv 0 -53933 I I 1 15072434 SO:0000738 185711aa389cf8d9302ad2ab07bd31e0 219 1 jXmB22vaK688X3rr2HPNzTNxnkQuCTgF 0 -53934 II II 1 15279421 SO:0000738 9e7e67d1e51cdb31791deab89dc31550 219 2 7k5F0INpuaUarMm8nyI6k0nSw46G65xC 0 -53935 III III 1 13783801 SO:0000738 c0f1a58d2bf6ff6a16617839bbc5fe52 219 3 -CzkjDYLb3a8V06zz7_pT3JxsxxGsQO0 0 -53936 IV IV 1 17493829 SO:0000738 2156ab555e19afd8a0ca5aba82fb2a2a 219 4 0Tm-TKE2JHxrM9avtCG0-9cTEnqpfw2b 0 -53937 MtDNA MtDNA 1 13794 SO:0000737 199e147d502d88e45047413dc83c039c 219 7 L1L6FO8ESIZJBNTOTPK7GnZvJYiewKK0 0 -53938 V V 1 20924180 SO:0000738 ffaf018f42f95375d2af6dcd402fef69 219 5 3ID7cGPgmNoJWDTn6-At5Geri2oVP6Rv 0 -53939 X X 1 17718942 SO:0000738 dae3e9ec047e8147337b550dd8564d0b 219 6 mmZ9kKbTbAEyf09gNqHPQW4fZj8YiuTx 0 -60019 Chromosome Chromosome 1 4641652 SO:0000738 482a2b04485ec8c4b5f4eaba2c2002da 1 7 NjjHtoQ2gYdy2RVkAZBKURBiV7xp-8ZS 0 -60020 I I 1 230218 SO:0000738 6681ac2f62509cfc220d78751b8dc524 216 1 lZyxiD_ByprhOUzrR1o1bq0ezO_1gkrn 0 -60021 II II 1 813184 SO:0000738 97a317c689cbdd7e92a5c159acd290d2 216 2 vw8jTiV5SAPDH4TEIZhNGylzNsQM4NC9 0 -60022 III III 1 316620 SO:0000738 54f4a74aa6392d9e19b82c38aa8ab345 216 3 A_i2Id0FjBI-tQyU4ZaCEdxRzQheDevn 0 -60023 IV IV 1 1531933 SO:0000738 74180788027e20df3de53dcb2367d9e3 216 4 QXSUMoZW_SSsCCN9_wc-xmubKQSOn3Qb 0 -60024 IX IX 1 439888 SO:0000738 4eae53ae7b2029b7e1075461c3eb9aac 216 9 hb1scjdCWL89PtAkR0AVH9-dNH5R0FsN 0 -60025 Mito Mito 1 85779 SO:0000737 71c39cf065b8d574f636b654c274cf1b 216 17 DrOlaWEY9iBBQrAAvbrzXsQlzZRV2J59 0 -60026 V V 1 576874 SO:0000738 d2787193198c8d260f58f2097f9e1e39 216 5 UN_b-wij0EtsgFqQ2xNsbXs_GYQQIbeQ 0 -60027 VI VI 1 270161 SO:0000738 b7ebc601f9a7df2e1ec5863deeae88a3 216 6 z-qJgWoacRBV77zcMgZN9E_utrdzmQsH 0 -60028 VII VII 1 1090940 SO:0000738 a308c7ebf0b67c4926bc190dc4ba8ed8 216 7 9wkqGXgK6bvM0gcjBiTDk9tAaqOZojlR 0 -60029 VIII VIII 1 562643 SO:0000738 f66a4f8eef89fc3c3a393fe0210169f1 216 8 K8ln7Ygob_lcVjNh-C8kUydzZjRt3UDf 0 -3725167 JAGYYT010000001.1 JAGYYT010000001.1 0 46592869 SO:0000738 3ba11daa61cfe7d6244960d4bcc55113 5 \N lkW5j6Yeu6GL6xusZQCvq7KqO-3KGeh3 0 -3725168 JAGYYT010000002.1 JAGYYT010000002.1 0 55482364 SO:0000738 bac357b106d364f8dabc169545765f92 5 \N iajXIfPoEJDR8BdUaRcI6LhzyZmgtXoA 0 -3725169 JAGYYT010000003.1 JAGYYT010000003.1 0 24607739 SO:0000738 4ff9f520e63b14b64aede25a070baea9 5 \N CMg2W54uVjBnLWwm3xjUnBvyEXCvxyQh 0 -3725170 JAGYYT010000004.1 JAGYYT010000004.1 0 10690193 SO:0000738 361cc6768f00f6bbb45ff12756c76cd1 5 \N I7OKvWCdpSHkxh_LZv3bdpb5sLcX_3IB 0 -3725171 JAGYYT010000005.1 JAGYYT010000005.1 0 28045165 SO:0000738 725d218bfe0ce15239206f78f5604781 5 \N EdEqUHN3H05E-RlSmWi9SySm8JB90XzN 0 -3725172 JAGYYT010000006.1 JAGYYT010000006.1 0 158663023 SO:0000738 514c27535b3840a2a2ee405f08cb3446 5 \N vZtOo2P9FiBega_X8LSl_0xj4dmJZmMs 0 -3725173 JAGYYT010000007.1 JAGYYT010000007.1 0 46627313 SO:0000738 55301b7468e9cbc4d73a9253d752b652 5 \N HdIELGLdm6TPwPbrHfRlu_i2Nbs5w8Dp 0 -3725174 JAGYYT010000008.1 JAGYYT010000008.1 0 1975142 SO:0000738 a54f58b59c6061d0c44273897a7c148e 5 \N VLql4yEZYIlHmzeHQLmlVrwou_7rBDxu 0 -3725175 JAGYYT010000009.1 JAGYYT010000009.1 0 118296892 SO:0000738 ac1cbd267c1a1ce0eef59afd7fda6047 5 \N fTc-MuSRIDKYqLW4zFh7D9SR3UeAQ7tC 0 -3725176 JAGYYT010000010.1 JAGYYT010000010.1 0 8989820 SO:0000738 bbab8e1c1a52042b195975018ff7271e 5 \N NRaeEfjaqeSr3XXxJss_Az8kfYEiJGUA 0 -3727869 JAHAON010000001.1 JAHAON010000001.1 0 108267787 SO:0000738 2bfee5eba76ddf72a9ce1fe104dfb73a 6 \N nK5v5CSf3RTvSj3ynps2rwV9Qlwtq2XP 0 -3727870 JAHAON010000002.1 JAHAON010000002.1 0 102298096 SO:0000738 136adb262594356fde2be5bf32d091ea 6 \N wn703GYvypvgDg3Nwmg-MI6xqflTex53 0 -3727871 JAHAON010000003.1 JAHAON010000003.1 0 28141884 SO:0000738 d2044e19e173fc0af1271a71a20c19e7 6 \N eoWhZT0jMNBS9d50Sg02HkWe2TJA-Rga 0 -3727872 JAHAON010000004.1 JAHAON010000004.1 0 40727531 SO:0000738 c7a23b01e734c6b22fdd078e97c6f1da 6 \N 2klgq3Y-GrPMLPHsGfkeE7lMuAjbnaxe 0 -3727873 JAHAON010000005.1 JAHAON010000005.1 0 111718856 SO:0000738 c3acd9fa5d4a02da10007a9b71d49f0d 6 \N 4h8XGzCJ6_JvovqGcLZ4HGz-WMOcnfwY 0 -3727874 JAHAON010000006.1 JAHAON010000006.1 0 89895720 SO:0000738 c24fc746780a67eba106ec07563849f9 6 \N jSJ3PqRBMXsYUlSdMZUOagnljy_QRUFJ 0 -3727875 JAHAON010000007.1 JAHAON010000007.1 0 39819246 SO:0000738 08a128df8dd3c67aa90fe81490cf9a80 6 \N Ao235-Ye0mxGwEwvzEo94ejs8Gk2l72e 0 -3727876 JAHAON010000008.1 JAHAON010000008.1 0 32367248 SO:0000738 b7fbc0bd6188d59f429e25ee07ab0e8d 6 \N mSJYKw654SvQGfz9HKxNvDP1VWiSK3zp 0 -3727877 JAHAON010000009.1 JAHAON010000009.1 0 56661561 SO:0000738 7ae11c0c030017c8c2d35d67c9ac9316 6 \N Crj1lvdkJ4Tlm0Q8CEgkGZ6vmxadaopS 0 -3727878 JAHAON010000010.1 JAHAON010000010.1 0 139507333 SO:0000738 255d7b0f6a9f1f7d5f171b50eada5d6b 6 \N leJMsG-aQiRxi_QrgNomPJ3Wjpins2Ej 0 -3742614 JAGYVY010000001.1 JAGYVY010000001.1 0 51866122 SO:0000738 c67e160f076badff0d3c09289f711944 4 \N lGYmQZArBanljWYhufm3YzWp46jnEE39 0 -3742615 JAGYVY010000002.1 JAGYVY010000002.1 0 8986677 SO:0000738 c18c0a8433faef15c8947b862607f41e 4 \N 93cXvE8ygIE1LwyeejKgJ2jUBgi0fe8Y 0 -3742616 JAGYVY010000003.1 JAGYVY010000003.1 0 47249189 SO:0000738 59d40cdafc3b0d91fe836a49ffe7f591 4 \N recqxURiYRbP6f9yq6ck8pWH3o6dPXuh 0 -3742617 JAGYVY010000004.1 JAGYVY010000004.1 0 55363342 SO:0000738 3f9b3c2935d8657fadf86a9c8b6c44e3 4 \N wsdlsVY07wFILtVM4y1mQToK3WsC0x25 0 -3742618 JAGYVY010000005.1 JAGYVY010000005.1 0 12137054 SO:0000738 56b31b794c9d1115a51ae703cae480b1 4 \N QidyPw5USOpHj12iV3zUJp2mOERzzcMQ 0 -3742619 JAGYVY010000006.1 JAGYVY010000006.1 0 54505167 SO:0000738 7fb60047ffae535b15057cb346e543f2 4 \N gfqraPB64YaWSK9LuPzNybVqzY_kJzBv 0 -3742620 JAGYVY010000007.1 JAGYVY010000007.1 0 24869350 SO:0000738 2990dbdb7dd770e092a6baf2a4d57f04 4 \N EY9mIHACnljr9Akv3C9VuXbHTSDuY-rr 0 -3742621 JAGYVY010000008.1 JAGYVY010000008.1 0 42967410 SO:0000738 7fc5b200bbaae4ce510058280e83a2e9 4 \N 8VytWiKv7yC_sKVPrpIcMMFN_hCYU9dU 0 -3742622 JAGYVY010000009.1 JAGYVY010000009.1 0 18572896 SO:0000738 4087350722474ec6169ec1da9fca6e73 4 \N O2GMCPifUdXp2QpLqixsqWLWdJSLPRdv 0 -3742623 JAGYVY010000010.1 JAGYVY010000010.1 0 6843817 SO:0000738 f54904e00811656ff76eed21370fdfcc 4 \N IyX4rd_pWEXWQ0J8jsVFwKdedzCi9_oM 0 -3752279 JAHEPF010000001.1 JAHEPF010000001.1 0 34747916 SO:0000738 9cd36c56739382f5ccd8bf05d7b7a782 7 \N _g7GaDOEVsjK_hf11hZ4ky3pVZTIHINa 0 -3752280 JAHEPF010000002.1 JAHEPF010000002.1 0 35554520 SO:0000738 a3e576310e6fc76eb80a394291fb3204 7 \N dc-s25qNY-HORolo9d0iwoecf2ozWQlE 0 -3752281 JAHEPF010000003.1 JAHEPF010000003.1 0 33855561 SO:0000738 3d070bacf47cee60c022d565c170b6d4 7 \N YnmaKNuXOkW8WvsAMno_7XxJ3vFFCIMP 0 -3752282 JAHEPF010000004.1 JAHEPF010000004.1 0 5384975 SO:0000738 90425435effabb9ed63a2bb2b360a4b2 7 \N teu3AdwsFzzqP2CCTmxwxal5AiQxJX5D 0 -3752283 JAHEPF010000005.1 JAHEPF010000005.1 0 47328102 SO:0000738 4d3dcad460c7997151ab3caf27af487b 7 \N F9p0cz2HQjiWrzGFKJZbzAy5wknT13nd 0 -3752284 JAHEPF010000006.1 JAHEPF010000006.1 0 26659419 SO:0000738 48d097797ada812bfb466838fbddb0d7 7 \N L3gpNYdi6RFEMs_Pzkr_ZGo-E7pfiDFu 0 -3752285 JAHEPF010000007.1 JAHEPF010000007.1 0 20832236 SO:0000738 ca81ce1541e88ae3b27a4ab3a6190510 7 \N dxDQLWXbm7mipB0rFMhevOvyRuaTqnws 0 -3752286 JAHEPF010000008.1 JAHEPF010000008.1 0 27516148 SO:0000738 aff3c5deddf48410710cee142d10ba7e 7 \N mjrFMbEfGqydPt4vCe_azkbYwGYYpFnA 0 -3752287 JAHEPF010000009.1 JAHEPF010000009.1 0 12081732 SO:0000738 5b38d7b54682b639c168d7b661f9c876 7 \N qTEIGxHpPXRcqPNOhUBwVpSuWSVLCZyU 0 -3752288 JAHEPF010000010.1 JAHEPF010000010.1 0 4432623 SO:0000738 682b6eafb4b94dfad5d124873ac50812 7 \N nA8EZeMykBVcjHvhcIlhhiWM7ylPm-_g 0 -3760113 JAHKSD010000001.1 JAHKSD010000001.1 0 110635364 SO:0000738 3dc28bf6013947644e3aa841763c7631 9 \N igWakb948tcC73JOgGzs-SDwWLKKuleI 0 -3760114 JAHKSD010000002.1 JAHKSD010000002.1 0 1186550 SO:0000738 bc667c2ec5c2dc662a767e540fafa0c1 9 \N NFg11cJVWZmoQeeJR-oNyB5QT8Cg6_w- 0 -3760115 JAHKSD010000003.1 JAHKSD010000003.1 0 32898 SO:0000738 1d4ad8c5a00a00dbb6ad0b968dbe365f 9 \N GNtztMSKoX5-PG1zYvEE0qyowc8akI3J 0 -3760116 JAHKSD010000004.1 JAHKSD010000004.1 0 111658246 SO:0000738 88bb1aa0877ac906791c96551f542cef 9 \N Tu05HwWwxYR9xPqLU7QUnGrAOCKlMUmX 0 -3760117 JAHKSD010000005.1 JAHKSD010000005.1 0 139957525 SO:0000738 de84bdeaebb942f9f0ebc57fbe60680d 9 \N jphEshZT4l8fr4HMvXAwu6EsqM3Ud8YQ 0 -3760118 JAHKSD010000006.1 JAHKSD010000006.1 0 104451682 SO:0000738 d6a6387b078f4170e723032b48d7f8b6 9 \N SF8WSrHIwx3iITPRWUFqDHkZk6p35Rlu 0 -3760119 JAHKSD010000007.1 JAHKSD010000007.1 0 93427 SO:0000738 3eb7b09435ea2d5e3421cdb77f24fcb1 9 \N OvwIE2BbB6aKm0uNNq5cXZ9lSQpoqVIX 0 -3760120 JAHKSD010000008.1 JAHKSD010000008.1 0 50570566 SO:0000738 d4a40b2b51cd0291b7ec047ca614a953 9 \N zVEcf4soxkzJkLVNcib3nnGaPOxi4cBb 0 -3760121 JAHKSD010000009.1 JAHKSD010000009.1 0 1212238 SO:0000738 6b31e1467a52b4747751e3d155bde949 9 \N 6SQJJlA7VRnm-L_Pf2F-a6TqUnO1IbsW 0 -3760122 JAHKSD010000010.1 JAHKSD010000010.1 0 100646410 SO:0000738 9391399f48bde664b20f9b8dca808704 9 \N 07Ugr7jsN9jhBD3JbtYMNh79DDxOPjio 0 -3785686 1A 1A 1 594102056 SO:0000738 1e85cfd7774c4118a84f1dd62783b31d 79 1 d1TidPwqmfZ775SEnWe1DyCPcKNpYpFO 0 -3785687 1B 1B 1 689851870 SO:0000738 b917173c52104915e78845d137d922d0 79 2 8WfzIibnnlG1L1iNPZ3Sk0uiwIMK4znm 0 -3785688 1D 1D 1 495453186 SO:0000738 cef89d6e535210757cb10e504cbf9b03 79 3 y3u4DW3vBcXYTjtMBVhsyN7Ly7Rc2dFk 0 -3785689 2A 2A 1 780798557 SO:0000738 080bb4a5ff38e4849bf446fbbe40000a 79 4 2PQ-iGfRjPsojv1K9g18dQfDzNO2lyXq 0 -3785690 2B 2B 1 801256715 SO:0000738 8a52f592bb8a4f44438f7791dcca142a 79 5 keeRxrxBxos9oB3Adk47VryL12KtzINt 0 -3785691 2D 2D 1 651852609 SO:0000738 3fc8c6b5ea64445d7fba64ac55719895 79 6 Gt6hPn3IJboGQ-mwMXzSITaPuYAkfYiD 0 -3785692 3A 3A 1 750843639 SO:0000738 606b5e6749208700ccd9ec246449a1ac 79 7 Rm2Xzny0tMfgjPqmTa7EDn1BYJfcgk66 0 -3785693 3B 3B 1 830829764 SO:0000738 7bae7b0ef4dabf3d7456de792263713c 79 8 Xrjc9MtZuG34jFBE4xY6VuhGKa6G41ya 0 -3785694 3D 3D 1 615552423 SO:0000738 e7feee9ffc854a18889517e36b1fc257 79 9 s-CnQy24wXYDP0EsRUji7tvIkdfnF2qN 0 -3785695 4A 4A 1 744588157 SO:0000738 0f0ac12903101a6d0c6b417066f4fc5d 79 10 Qz1gdFRd4l6QXrOlcreln873gbns69Q0 0 +1871 1 1 1 249250621 SO:0000738 1b22b98cdeb4a9304cb5d48026a85128 40 1 S_KjnFVz-FE7M0W6yoaUDgYxLPc1jyWU 0 primary_assembly 0 \N +1872 10 10 1 135534747 SO:0000738 988c28e000e84c26d552359af1ea2e1d 40 10 -BOZ8Esn8J88qDwNiSEwUr5425UXdiGX 0 primary_assembly 0 \N +1873 11 11 1 135006516 SO:0000738 98c59049a2df285c76ffb1c6db8f8b96 40 11 XXi2_O1ly-CCOi3HP5TypAw7LtC6niFG 0 primary_assembly 0 \N +1874 12 12 1 133851895 SO:0000738 51851ac0e1a115847ad36449b0015864 40 12 105bBysLoDFQHhajooTAUyUkNiZ8LJEH 0 primary_assembly 0 \N +1875 13 13 1 115169878 SO:0000738 283f8d7892baa81b510a015719ca7b0b 40 13 Ewb9qlgTqN6e_XQiRVYpoUfZJHXeiUfH 0 primary_assembly 0 \N +1876 14 14 1 107349540 SO:0000738 98f3cae32b2a2e9524bc19813927542e 40 14 5Ji6FGEKfejK1U6BMScqrdKJK8GqmIGf 0 primary_assembly 0 \N +1877 15 15 1 102531392 SO:0000738 e5645a794a8238215b2cd77acb95a078 40 15 zIMZb3Ft7RdWa5XYq0PxIlezLY2ccCgt 0 primary_assembly 0 \N +1878 16 16 1 90354753 SO:0000738 fc9b1a7b42b97a864f56b348b06095e6 40 16 W6wLoIFOn4G7cjopxPxYNk2lcEqhLQFb 0 primary_assembly 0 \N +1879 17 17 1 81195210 SO:0000738 351f64d4f4f9ddd45b35336ad97aa6de 40 17 AjWXsI7AkTK35XW9pgd3UbjpC3MAevlz 0 primary_assembly 0 \N +1880 18 18 1 78077248 SO:0000738 b15d4b2d29dde9d3e4f93d1d0f2cbc9c 40 18 BTj4BDaaHYoPhD3oY2GdwC_l0uqZ92UD 0 primary_assembly 0 \N +1893 GL000191.1 GL000191.1 0 106433 SO:0000738 d75b436f50a8214ee9c2a51d30b2c2cc 40 \N aX54PSRCZbj0EVn5QAH4zoO72gsmSTO8 0 primary_assembly 0 \N +1894 GL000192.1 GL000192.1 0 547496 SO:0000738 325ba9e808f669dfeee210fdd7b470ac 40 \N udMJQEKIyWPe8YGW1Dws6IHk_1NbkY9Y 0 primary_assembly 0 \N +1895 GL000193.1 GL000193.1 0 189789 SO:0000738 dbb6e8ece0b5de29da56601613007c2a 40 \N be3_RQlT0dXc4jYLkbEiRC6HSl7u1FjF 0 primary_assembly 0 \N +1896 GL000194.1 GL000194.1 0 191469 SO:0000738 6ac8f815bf8e845bb3031b73f812c012 40 \N WyYCLC4VxJvbBz2b_wBWF5BdQotiUVdB 0 primary_assembly 0 \N +1897 GL000195.1 GL000195.1 0 182896 SO:0000738 5d9ec007868d517e73543b005ba48535 40 \N 2LEWMcieZGf9Sx4VpEeWSDcULUVHGm0w 0 primary_assembly 0 \N +1955 1 1 1 248956422 SO:0000738 2648ae1bacce4ec4b6cf337dcae37816 92 1 2YnepKM7OkBoOrKmvHbGqguVfF9amCST 0 primary_assembly 0 \N +1956 10 10 1 133797422 SO:0000738 907112d17fcb73bcab1ed1c72b97ce68 92 10 P6q4sxSkFfKZpUgEwW73rx2a2ZYY-_pH 0 primary_assembly 0 \N +1957 11 11 1 135086622 SO:0000738 1511375dc2dd1b633af8cf439ae90cec 92 11 2NkFm8HK88MqeNkCgj78KidCAXgnsfV1 0 primary_assembly 0 \N +1958 12 12 1 133275309 SO:0000738 e81e16d3f44337034695a29b97708fce 92 12 7dzBrNZj_CM_Dg7zLl--e18KI8wVUxEd 0 primary_assembly 0 \N +1959 13 13 1 114364328 SO:0000738 17dab79b963ccd8e7377cef59a54fe1c 92 13 0qw_sn8Cl7OmMTFlukjFD2DUejW0T80Y 0 primary_assembly 0 \N +1960 14 14 1 107043718 SO:0000738 acbd9552c059d9b403e75ed26c1ce5bc 92 14 eK4D2MosgK_ivBkgi6FVPg5UXs1bYESm 0 primary_assembly 0 \N +1961 15 15 1 101991189 SO:0000738 f036bd11158407596ca6bf3581454706 92 15 AsXvWL1-2i5U_buw6_niVIxD6zTbAuS6 0 primary_assembly 0 \N +1962 16 16 1 90338345 SO:0000738 24e7cabfba3548a2bb4dff582b9ee870 92 16 EjrUp_S2oCd2b_SdqeZeOYUkEC966iVh 0 primary_assembly 0 \N +1963 17 17 1 83257441 SO:0000738 a8499ca51d6fb77332c2d242923994eb 92 17 upqChCoU-Gtd_61IidCsln-r8cxUTFeP 0 primary_assembly 0 \N +1964 18 18 1 80373285 SO:0000738 11eeaa801f6b0e2e36a1138616b8ee9a 92 18 vWwFhJ5lQDMhh-czg06YtlWqu0lvFAZV 0 primary_assembly 0 \N +53919 1 1 1 640851 SO:0000738 46d861cab72441c63589339b36e644ac 15 1 JfN51lUFS8sH2f2-OQ58B1f-ZFmxtjEO 0 primary_assembly 0 \N +53920 10 10 1 1687656 SO:0000738 7f3f93983d66669bf5e18266f2565aa4 15 10 nv-h1XrxCbnvlyAlLn612PX8kJBmpf9m 0 primary_assembly 0 \N +53921 11 11 1 2038340 SO:0000738 3733d85e3f9fd8c5284e70dc977950b7 15 11 GEeKuO07bJBfzpd4KKpQPasLKNWpBia6 0 primary_assembly 0 \N +53922 12 12 1 2271494 SO:0000738 81d7ade8026e3099dc4a595a27ce5fe4 15 12 qoUuu7OZJT_jdUqwbk_gk-yvozwqFNdQ 0 primary_assembly 0 \N +53923 13 13 1 2925236 SO:0000738 2e5f27c4aa13202464e20c520a90bddc 15 13 HuuhxwpHdyAZqDG3ex62hIuMZ-FHd5Io 0 primary_assembly 0 \N +53924 14 14 1 3291936 SO:0000738 2bfeb85b8b4486aa4b03c3cb4dc56dad 15 14 mJezSE0gh0LS5XVFlfR6PZIRw3-eLyAR 0 primary_assembly 0 \N +53925 2 2 1 947102 SO:0000738 3264ffcaf0fb7e7c6adf14088c866886 15 2 tpNy-HkCz6Cq62euyOlWNduv91huwg7p 0 primary_assembly 0 \N +53926 3 3 1 1067971 SO:0000738 1a2a92569065a83856362480f86aaf17 15 3 YqknWYibUjSC4bFoJwSTAcXKBC4uld3x 0 primary_assembly 0 \N +53927 4 4 1 1200490 SO:0000738 3bb29def1493995037b9426acc5cdc20 15 4 jETGmEu2CRUEy_e3SK9zDfszR9-a9UmS 0 primary_assembly 0 \N +53928 5 5 1 1343557 SO:0000738 bf203f82beeabc7337d1dcb50bca14cf 15 5 6UHvnqsyEKDTHAzF6FZ0OHgDVVeQ6AZv 0 primary_assembly 0 \N +53933 I I 1 15072434 SO:0000738 185711aa389cf8d9302ad2ab07bd31e0 219 1 jXmB22vaK688X3rr2HPNzTNxnkQuCTgF 0 primary_assembly 0 \N +53934 II II 1 15279421 SO:0000738 9e7e67d1e51cdb31791deab89dc31550 219 2 7k5F0INpuaUarMm8nyI6k0nSw46G65xC 0 primary_assembly 0 \N +53935 III III 1 13783801 SO:0000738 c0f1a58d2bf6ff6a16617839bbc5fe52 219 3 -CzkjDYLb3a8V06zz7_pT3JxsxxGsQO0 0 primary_assembly 0 \N +53936 IV IV 1 17493829 SO:0000738 2156ab555e19afd8a0ca5aba82fb2a2a 219 4 0Tm-TKE2JHxrM9avtCG0-9cTEnqpfw2b 0 primary_assembly 0 \N +53937 MtDNA MtDNA 1 13794 SO:0000737 199e147d502d88e45047413dc83c039c 219 7 L1L6FO8ESIZJBNTOTPK7GnZvJYiewKK0 0 primary_assembly 0 \N +53938 V V 1 20924180 SO:0000738 ffaf018f42f95375d2af6dcd402fef69 219 5 3ID7cGPgmNoJWDTn6-At5Geri2oVP6Rv 0 primary_assembly 0 \N +53939 X X 1 17718942 SO:0000738 dae3e9ec047e8147337b550dd8564d0b 219 6 mmZ9kKbTbAEyf09gNqHPQW4fZj8YiuTx 0 primary_assembly 0 \N +60019 Chromosome Chromosome 1 4641652 SO:0000738 482a2b04485ec8c4b5f4eaba2c2002da 1 7 NjjHtoQ2gYdy2RVkAZBKURBiV7xp-8ZS 0 primary_assembly 0 \N +60020 I I 1 230218 SO:0000738 6681ac2f62509cfc220d78751b8dc524 216 1 lZyxiD_ByprhOUzrR1o1bq0ezO_1gkrn 0 primary_assembly 0 \N +60021 II II 1 813184 SO:0000738 97a317c689cbdd7e92a5c159acd290d2 216 2 vw8jTiV5SAPDH4TEIZhNGylzNsQM4NC9 0 primary_assembly 0 \N +60022 III III 1 316620 SO:0000738 54f4a74aa6392d9e19b82c38aa8ab345 216 3 A_i2Id0FjBI-tQyU4ZaCEdxRzQheDevn 0 primary_assembly 0 \N +60023 IV IV 1 1531933 SO:0000738 74180788027e20df3de53dcb2367d9e3 216 4 QXSUMoZW_SSsCCN9_wc-xmubKQSOn3Qb 0 primary_assembly 0 \N +60024 IX IX 1 439888 SO:0000738 4eae53ae7b2029b7e1075461c3eb9aac 216 9 hb1scjdCWL89PtAkR0AVH9-dNH5R0FsN 0 primary_assembly 0 \N +60025 Mito Mito 1 85779 SO:0000737 71c39cf065b8d574f636b654c274cf1b 216 17 DrOlaWEY9iBBQrAAvbrzXsQlzZRV2J59 0 primary_assembly 0 \N +60026 V V 1 576874 SO:0000738 d2787193198c8d260f58f2097f9e1e39 216 5 UN_b-wij0EtsgFqQ2xNsbXs_GYQQIbeQ 0 primary_assembly 0 \N +60027 VI VI 1 270161 SO:0000738 b7ebc601f9a7df2e1ec5863deeae88a3 216 6 z-qJgWoacRBV77zcMgZN9E_utrdzmQsH 0 primary_assembly 0 \N +60028 VII VII 1 1090940 SO:0000738 a308c7ebf0b67c4926bc190dc4ba8ed8 216 7 9wkqGXgK6bvM0gcjBiTDk9tAaqOZojlR 0 primary_assembly 0 \N +60029 VIII VIII 1 562643 SO:0000738 f66a4f8eef89fc3c3a393fe0210169f1 216 8 K8ln7Ygob_lcVjNh-C8kUydzZjRt3UDf 0 primary_assembly 0 \N +3725167 JAGYYT010000001.1 JAGYYT010000001.1 0 46592869 SO:0000738 3ba11daa61cfe7d6244960d4bcc55113 5 \N lkW5j6Yeu6GL6xusZQCvq7KqO-3KGeh3 0 primary_assembly 0 \N +3725168 JAGYYT010000002.1 JAGYYT010000002.1 0 55482364 SO:0000738 bac357b106d364f8dabc169545765f92 5 \N iajXIfPoEJDR8BdUaRcI6LhzyZmgtXoA 0 primary_assembly 0 \N +3725169 JAGYYT010000003.1 JAGYYT010000003.1 0 24607739 SO:0000738 4ff9f520e63b14b64aede25a070baea9 5 \N CMg2W54uVjBnLWwm3xjUnBvyEXCvxyQh 0 primary_assembly 0 \N +3725170 JAGYYT010000004.1 JAGYYT010000004.1 0 10690193 SO:0000738 361cc6768f00f6bbb45ff12756c76cd1 5 \N I7OKvWCdpSHkxh_LZv3bdpb5sLcX_3IB 0 primary_assembly 0 \N +3725171 JAGYYT010000005.1 JAGYYT010000005.1 0 28045165 SO:0000738 725d218bfe0ce15239206f78f5604781 5 \N EdEqUHN3H05E-RlSmWi9SySm8JB90XzN 0 primary_assembly 0 \N +3725172 JAGYYT010000006.1 JAGYYT010000006.1 0 158663023 SO:0000738 514c27535b3840a2a2ee405f08cb3446 5 \N vZtOo2P9FiBega_X8LSl_0xj4dmJZmMs 0 primary_assembly 0 \N +3725173 JAGYYT010000007.1 JAGYYT010000007.1 0 46627313 SO:0000738 55301b7468e9cbc4d73a9253d752b652 5 \N HdIELGLdm6TPwPbrHfRlu_i2Nbs5w8Dp 0 primary_assembly 0 \N +3725174 JAGYYT010000008.1 JAGYYT010000008.1 0 1975142 SO:0000738 a54f58b59c6061d0c44273897a7c148e 5 \N VLql4yEZYIlHmzeHQLmlVrwou_7rBDxu 0 primary_assembly 0 \N +3725175 JAGYYT010000009.1 JAGYYT010000009.1 0 118296892 SO:0000738 ac1cbd267c1a1ce0eef59afd7fda6047 5 \N fTc-MuSRIDKYqLW4zFh7D9SR3UeAQ7tC 0 primary_assembly 0 \N +3725176 JAGYYT010000010.1 JAGYYT010000010.1 0 8989820 SO:0000738 bbab8e1c1a52042b195975018ff7271e 5 \N NRaeEfjaqeSr3XXxJss_Az8kfYEiJGUA 0 primary_assembly 0 \N +3727869 JAHAON010000001.1 JAHAON010000001.1 0 108267787 SO:0000738 2bfee5eba76ddf72a9ce1fe104dfb73a 6 \N nK5v5CSf3RTvSj3ynps2rwV9Qlwtq2XP 0 primary_assembly 0 \N +3727870 JAHAON010000002.1 JAHAON010000002.1 0 102298096 SO:0000738 136adb262594356fde2be5bf32d091ea 6 \N wn703GYvypvgDg3Nwmg-MI6xqflTex53 0 primary_assembly 0 \N +3727871 JAHAON010000003.1 JAHAON010000003.1 0 28141884 SO:0000738 d2044e19e173fc0af1271a71a20c19e7 6 \N eoWhZT0jMNBS9d50Sg02HkWe2TJA-Rga 0 primary_assembly 0 \N +3727872 JAHAON010000004.1 JAHAON010000004.1 0 40727531 SO:0000738 c7a23b01e734c6b22fdd078e97c6f1da 6 \N 2klgq3Y-GrPMLPHsGfkeE7lMuAjbnaxe 0 primary_assembly 0 \N +3727873 JAHAON010000005.1 JAHAON010000005.1 0 111718856 SO:0000738 c3acd9fa5d4a02da10007a9b71d49f0d 6 \N 4h8XGzCJ6_JvovqGcLZ4HGz-WMOcnfwY 0 primary_assembly 0 \N +3727874 JAHAON010000006.1 JAHAON010000006.1 0 89895720 SO:0000738 c24fc746780a67eba106ec07563849f9 6 \N jSJ3PqRBMXsYUlSdMZUOagnljy_QRUFJ 0 primary_assembly 0 \N +3727875 JAHAON010000007.1 JAHAON010000007.1 0 39819246 SO:0000738 08a128df8dd3c67aa90fe81490cf9a80 6 \N Ao235-Ye0mxGwEwvzEo94ejs8Gk2l72e 0 primary_assembly 0 \N +3727876 JAHAON010000008.1 JAHAON010000008.1 0 32367248 SO:0000738 b7fbc0bd6188d59f429e25ee07ab0e8d 6 \N mSJYKw654SvQGfz9HKxNvDP1VWiSK3zp 0 primary_assembly 0 \N +3727877 JAHAON010000009.1 JAHAON010000009.1 0 56661561 SO:0000738 7ae11c0c030017c8c2d35d67c9ac9316 6 \N Crj1lvdkJ4Tlm0Q8CEgkGZ6vmxadaopS 0 primary_assembly 0 \N +3727878 JAHAON010000010.1 JAHAON010000010.1 0 139507333 SO:0000738 255d7b0f6a9f1f7d5f171b50eada5d6b 6 \N leJMsG-aQiRxi_QrgNomPJ3Wjpins2Ej 0 primary_assembly 0 \N +3742614 JAGYVY010000001.1 JAGYVY010000001.1 0 51866122 SO:0000738 c67e160f076badff0d3c09289f711944 4 \N lGYmQZArBanljWYhufm3YzWp46jnEE39 0 primary_assembly 0 \N +3742615 JAGYVY010000002.1 JAGYVY010000002.1 0 8986677 SO:0000738 c18c0a8433faef15c8947b862607f41e 4 \N 93cXvE8ygIE1LwyeejKgJ2jUBgi0fe8Y 0 primary_assembly 0 \N +3742616 JAGYVY010000003.1 JAGYVY010000003.1 0 47249189 SO:0000738 59d40cdafc3b0d91fe836a49ffe7f591 4 \N recqxURiYRbP6f9yq6ck8pWH3o6dPXuh 0 primary_assembly 0 \N +3742617 JAGYVY010000004.1 JAGYVY010000004.1 0 55363342 SO:0000738 3f9b3c2935d8657fadf86a9c8b6c44e3 4 \N wsdlsVY07wFILtVM4y1mQToK3WsC0x25 0 primary_assembly 0 \N +3742618 JAGYVY010000005.1 JAGYVY010000005.1 0 12137054 SO:0000738 56b31b794c9d1115a51ae703cae480b1 4 \N QidyPw5USOpHj12iV3zUJp2mOERzzcMQ 0 primary_assembly 0 \N +3742619 JAGYVY010000006.1 JAGYVY010000006.1 0 54505167 SO:0000738 7fb60047ffae535b15057cb346e543f2 4 \N gfqraPB64YaWSK9LuPzNybVqzY_kJzBv 0 primary_assembly 0 \N +3742620 JAGYVY010000007.1 JAGYVY010000007.1 0 24869350 SO:0000738 2990dbdb7dd770e092a6baf2a4d57f04 4 \N EY9mIHACnljr9Akv3C9VuXbHTSDuY-rr 0 primary_assembly 0 \N +3742621 JAGYVY010000008.1 JAGYVY010000008.1 0 42967410 SO:0000738 7fc5b200bbaae4ce510058280e83a2e9 4 \N 8VytWiKv7yC_sKVPrpIcMMFN_hCYU9dU 0 primary_assembly 0 \N +3742622 JAGYVY010000009.1 JAGYVY010000009.1 0 18572896 SO:0000738 4087350722474ec6169ec1da9fca6e73 4 \N O2GMCPifUdXp2QpLqixsqWLWdJSLPRdv 0 primary_assembly 0 \N +3742623 JAGYVY010000010.1 JAGYVY010000010.1 0 6843817 SO:0000738 f54904e00811656ff76eed21370fdfcc 4 \N IyX4rd_pWEXWQ0J8jsVFwKdedzCi9_oM 0 primary_assembly 0 \N +3752279 JAHEPF010000001.1 JAHEPF010000001.1 0 34747916 SO:0000738 9cd36c56739382f5ccd8bf05d7b7a782 7 \N _g7GaDOEVsjK_hf11hZ4ky3pVZTIHINa 0 primary_assembly 0 \N +3752280 JAHEPF010000002.1 JAHEPF010000002.1 0 35554520 SO:0000738 a3e576310e6fc76eb80a394291fb3204 7 \N dc-s25qNY-HORolo9d0iwoecf2ozWQlE 0 primary_assembly 0 \N +3752281 JAHEPF010000003.1 JAHEPF010000003.1 0 33855561 SO:0000738 3d070bacf47cee60c022d565c170b6d4 7 \N YnmaKNuXOkW8WvsAMno_7XxJ3vFFCIMP 0 primary_assembly 0 \N +3752282 JAHEPF010000004.1 JAHEPF010000004.1 0 5384975 SO:0000738 90425435effabb9ed63a2bb2b360a4b2 7 \N teu3AdwsFzzqP2CCTmxwxal5AiQxJX5D 0 primary_assembly 0 \N +3752283 JAHEPF010000005.1 JAHEPF010000005.1 0 47328102 SO:0000738 4d3dcad460c7997151ab3caf27af487b 7 \N F9p0cz2HQjiWrzGFKJZbzAy5wknT13nd 0 primary_assembly 0 \N +3752284 JAHEPF010000006.1 JAHEPF010000006.1 0 26659419 SO:0000738 48d097797ada812bfb466838fbddb0d7 7 \N L3gpNYdi6RFEMs_Pzkr_ZGo-E7pfiDFu 0 primary_assembly 0 \N +3752285 JAHEPF010000007.1 JAHEPF010000007.1 0 20832236 SO:0000738 ca81ce1541e88ae3b27a4ab3a6190510 7 \N dxDQLWXbm7mipB0rFMhevOvyRuaTqnws 0 primary_assembly 0 \N +3752286 JAHEPF010000008.1 JAHEPF010000008.1 0 27516148 SO:0000738 aff3c5deddf48410710cee142d10ba7e 7 \N mjrFMbEfGqydPt4vCe_azkbYwGYYpFnA 0 primary_assembly 0 \N +3752287 JAHEPF010000009.1 JAHEPF010000009.1 0 12081732 SO:0000738 5b38d7b54682b639c168d7b661f9c876 7 \N qTEIGxHpPXRcqPNOhUBwVpSuWSVLCZyU 0 primary_assembly 0 \N +3752288 JAHEPF010000010.1 JAHEPF010000010.1 0 4432623 SO:0000738 682b6eafb4b94dfad5d124873ac50812 7 \N nA8EZeMykBVcjHvhcIlhhiWM7ylPm-_g 0 primary_assembly 0 \N +3760113 JAHKSD010000001.1 JAHKSD010000001.1 0 110635364 SO:0000738 3dc28bf6013947644e3aa841763c7631 9 \N igWakb948tcC73JOgGzs-SDwWLKKuleI 0 primary_assembly 0 \N +3760114 JAHKSD010000002.1 JAHKSD010000002.1 0 1186550 SO:0000738 bc667c2ec5c2dc662a767e540fafa0c1 9 \N NFg11cJVWZmoQeeJR-oNyB5QT8Cg6_w- 0 primary_assembly 0 \N +3760115 JAHKSD010000003.1 JAHKSD010000003.1 0 32898 SO:0000738 1d4ad8c5a00a00dbb6ad0b968dbe365f 9 \N GNtztMSKoX5-PG1zYvEE0qyowc8akI3J 0 primary_assembly 0 \N +3760116 JAHKSD010000004.1 JAHKSD010000004.1 0 111658246 SO:0000738 88bb1aa0877ac906791c96551f542cef 9 \N Tu05HwWwxYR9xPqLU7QUnGrAOCKlMUmX 0 primary_assembly 0 \N +3760117 JAHKSD010000005.1 JAHKSD010000005.1 0 139957525 SO:0000738 de84bdeaebb942f9f0ebc57fbe60680d 9 \N jphEshZT4l8fr4HMvXAwu6EsqM3Ud8YQ 0 primary_assembly 0 \N +3760118 JAHKSD010000006.1 JAHKSD010000006.1 0 104451682 SO:0000738 d6a6387b078f4170e723032b48d7f8b6 9 \N SF8WSrHIwx3iITPRWUFqDHkZk6p35Rlu 0 primary_assembly 0 \N +3760119 JAHKSD010000007.1 JAHKSD010000007.1 0 93427 SO:0000738 3eb7b09435ea2d5e3421cdb77f24fcb1 9 \N OvwIE2BbB6aKm0uNNq5cXZ9lSQpoqVIX 0 primary_assembly 0 \N +3760120 JAHKSD010000008.1 JAHKSD010000008.1 0 50570566 SO:0000738 d4a40b2b51cd0291b7ec047ca614a953 9 \N zVEcf4soxkzJkLVNcib3nnGaPOxi4cBb 0 primary_assembly 0 \N +3760121 JAHKSD010000009.1 JAHKSD010000009.1 0 1212238 SO:0000738 6b31e1467a52b4747751e3d155bde949 9 \N 6SQJJlA7VRnm-L_Pf2F-a6TqUnO1IbsW 0 primary_assembly 0 \N +3760122 JAHKSD010000010.1 JAHKSD010000010.1 0 100646410 SO:0000738 9391399f48bde664b20f9b8dca808704 9 \N 07Ugr7jsN9jhBD3JbtYMNh79DDxOPjio 0 primary_assembly 0 \N +3785686 1A 1A 1 594102056 SO:0000738 1e85cfd7774c4118a84f1dd62783b31d 79 1 d1TidPwqmfZ775SEnWe1DyCPcKNpYpFO 0 primary_assembly 0 \N +3785687 1B 1B 1 689851870 SO:0000738 b917173c52104915e78845d137d922d0 79 2 8WfzIibnnlG1L1iNPZ3Sk0uiwIMK4znm 0 primary_assembly 0 \N +3785688 1D 1D 1 495453186 SO:0000738 cef89d6e535210757cb10e504cbf9b03 79 3 y3u4DW3vBcXYTjtMBVhsyN7Ly7Rc2dFk 0 primary_assembly 0 \N +3785689 2A 2A 1 780798557 SO:0000738 080bb4a5ff38e4849bf446fbbe40000a 79 4 2PQ-iGfRjPsojv1K9g18dQfDzNO2lyXq 0 primary_assembly 0 \N +3785690 2B 2B 1 801256715 SO:0000738 8a52f592bb8a4f44438f7791dcca142a 79 5 keeRxrxBxos9oB3Adk47VryL12KtzINt 0 primary_assembly 0 \N +3785691 2D 2D 1 651852609 SO:0000738 3fc8c6b5ea64445d7fba64ac55719895 79 6 Gt6hPn3IJboGQ-mwMXzSITaPuYAkfYiD 0 primary_assembly 0 \N +3785692 3A 3A 1 750843639 SO:0000738 606b5e6749208700ccd9ec246449a1ac 79 7 Rm2Xzny0tMfgjPqmTa7EDn1BYJfcgk66 0 primary_assembly 0 \N +3785693 3B 3B 1 830829764 SO:0000738 7bae7b0ef4dabf3d7456de792263713c 79 8 Xrjc9MtZuG34jFBE4xY6VuhGKa6G41ya 0 primary_assembly 0 \N +3785694 3D 3D 1 615552423 SO:0000738 e7feee9ffc854a18889517e36b1fc257 79 9 s-CnQy24wXYDP0EsRUji7tvIkdfnF2qN 0 primary_assembly 0 \N +3785695 4A 4A 1 744588157 SO:0000738 0f0ac12903101a6d0c6b417066f4fc5d 79 10 Qz1gdFRd4l6QXrOlcreln873gbns69Q0 0 primary_assembly 0 \N diff --git a/src/tests/databases/ensembl_genome_metadata/attribute.txt b/src/tests/databases/ensembl_genome_metadata/attribute.txt index b65c6010..afd4abff 100644 --- a/src/tests/databases/ensembl_genome_metadata/attribute.txt +++ b/src/tests/databases/ensembl_genome_metadata/attribute.txt @@ -1,104 +1,104 @@ -1 assembly.accession assembly.accession assembly.accession string 1, -2 assembly.stats.chromosomes Chromosomes or plasmids Number of structures in cells containing DNA integer 0, -3 assembly.stats.component_sequences Component sequences Part of the primary sequences in assembly integer 0, -4 assembly.stats.contig_n50 Contig N50 Median size of contigs in a genome assembly bp 0, -5 assembly.date assembly.date assembly.date string 0, -6 assembly.default assembly.default assembly.default string 0, -7 assembly.stats.gc_percentage Average GC content Percentage of nucleotides in DNA that are G or C percent 0, -8 assembly.is_reference assembly.is_reference assembly.is_reference string 0, -9 assembly.level assembly.level assembly.level string 0, -10 assembly.mapping assembly.mapping assembly.mapping string 0, -11 assembly.name assembly.name assembly.name string 1, -12 assembly.provider_name assembly.provider_name assembly.provider_name string 0, -13 assembly.provider_url assembly.provider_url assembly.provider_url string 0, -14 assembly.stats.spanned_gaps Spanned gaps Number of gaps covered by sequencing reads integer 0, -15 assembly.tolid assembly.tolid assembly.tolid string 0, -16 assembly.stats.toplevel_sequences Top level sequences Primary sequences in a genome assembly integer 0, -17 assembly.stats.total_coding_sequence_length Total coding sequence length Total length of all coding sequences bp 0, -18 assembly.stats.total_gap_length Total gap length Total length of all gaps in a genome assembly bp 0, -19 assembly.stats.total_genome_length Total genome length Total length of all genomic sequences bp 0, -20 assembly.ucsc_alias assembly.ucsc_alias assembly.ucsc_alias string 0, -21 genebuild.stats.average_cds_length Average CDS length Average length of coding sequences float 0, -22 genebuild.stats.average_coding_exons_per_coding_gene Average coding exons per coding gene Average coding exons per coding gene string 0, -23 genebuild.stats.average_coding_exons_per_transcript Average coding exons per transcript Average coding exons per coding transcript float 0, -24 genebuild.stats.average_coding_exon_length Average exon length per coding gene Average length of coding exons bp 0, -25 genebuild.stats.average_exon_length Average exon length Average length of exons bp 0, -26 genebuild.stats.average_genomic_span Average coding genomic span Average length of all genomic regions bp 0, -27 genebuild.stats.average_intron_length Average intron length Average intron length per coding gene bp 0, -28 genebuild.stats.average_sequence_legth Average coding sequence length Average length of sequences in genome bp 0, -29 genebuild.stats.coding_genes Coding genes Genes that code for proteins integer 0, -30 genebuild.stats.coding_transcripts Coding transcripts Transcripts that code for proteins integer 0, -31 genebuild.stats.coding_transcripts_per_gene Average coding transcripts per gene Average coding transcripts per gene float 0, -32 genebuild.hash genebuild.hash genebuild.hash string 0, -33 genebuild.initial_release_date genebuild.initial_release_date genebuild.initial_release_date string 0, -34 genebuild.last_geneset_update genebuild.last_geneset_update genebuild.last_geneset_update string 1, -35 genebuild.level genebuild.level genebuild.level string 0, -36 genebuild.longest_gene_length Longest coding gene Length of longest gene bp 0, -37 genebuild.method genebuild.method genebuild.method string 0, -38 genebuild.method_display genebuild.method_display genebuild.method_display string 0, -39 genebuild.stats.nc_average_exons_per_transcript Average exons per non-coding transcript Mean exon count per transcript float 0, -40 genebuild.stats.nc_average_exon_length Average exon length per non-coding transcript Mean exon length bp 0, -41 genebuild.stats.nc_average_genomic_span Average non-coding genomic span Mean length of all genomic regions bp 0, -42 genebuild.stats.nc_average_sequence_length Average non-coding sequence length Mean length of all sequences bp 0, -43 genebuild.stats.nc_longest_gene_length Longest non-coding gene Length of longest non-coding gene bp 0, -44 genebuild.stats.nc_long_non_coding_genes Long non-coding genes Long genes not coding for proteins integer 0, -45 genebuild.stats.nc_misc_non_coding_genes Misc. non-coding genes Miscellaneous non-coding genes integer 0, -46 genebuild.stats.nc_non_coding_genes Non-coding genes Genes that don't code for proteins integer 0, -47 genebuild.stats.nc_shortest_gene_length Shortest non-coding gene Length of shortest gene bp 0, -48 genebuild.stats.nc_small_non_coding_genes Small non-coding genes Small genes not coding for proteins integer 0, -49 genebuild.stats.nc_total_introns Introns in non-coding genes Total intron count integer 0, -50 genebuild.stats.nc_total_transcripts Non-coding transcripts Total RNA transcript count integer 0, -51 genebuild.stats.nc_transcripts_per_gene Average transcripts per non-coding gene Mean transcripts count per gene float 0, -52 genebuild.stats.ps_average_exons_per_transcript Average intron length per pseudogene Mean exon count per pseudogene transcript float 0, -53 genebuild.stats.ps_average_exon_length Average exon length per pseudogene Mean pseudogene exon length bp 0, -54 genebuild.stats.ps_average_genomic_span Average pseudogene genomic span Mean length of pseudogene regions bp 0, -55 genebuild.stats.ps_average_intron_length Average intron length per pseudogene Mean pseudogene intron length bp 0, -56 genebuild.stats.ps_average_sequence_length Average pseudogene sequence length Mean length of pseudogene sequences bp 0, -57 genebuild.stats.ps_longest_gene_length Longest pseudogene Length of longest pseudogene bp 0, -58 genebuild.stats.ps_pseudogenes Pseudogenes Genes which don't code functional protiens integer 0, -59 genebuild.stats.ps_shortest_gene_length Shortest pseudogene Length of shortest pseudogene bp 0, -60 genebuild.stats.ps_total_exons Exons in pseudogenes Total exon count in pseudogenes integer 0, -61 genebuild.stats.ps_total_introns Introns in pseudogenes Total intron count in pseudogenes integer 0, -62 genebuild.stats.ps_total_transcripts Transcripts in pseudogenes Total pseudogene RNA transcript count integer 0, -63 genebuild.stats.ps_transcripts_per_gene Average transcripts per pseudogene Mean pseudogene transcripts count per pseudogene float 0, -64 genebuild.stats.shortest_gene_length Shortest coding gene Length of shortest gene bp 0, -65 genebuild.start_date genebuild.start_date genebuild.start_date string 1, -66 genebuild.stats.total_coding_exons Exons in coding genes Total number of coding exons integer 0, -67 genebuild.stats.total_exons Exons in genes Total number of exons integer 0, -68 genebuild.stats.total_introns Introns in coding genes Total number of introns integer 0, -69 genebuild.stats.total_transcripts Transcripts in coding genes Total number of RNA transcripts integer 0, -70 genebuild.stats.transcripts_per_gene Average transcripts per coding gene Average number of transcripts per gene float 0, -71 genebuild.version genebuild.version genebuild.version string 1, -72 genebuild.sample_gene genebuild.sample_gene Sample Gene Data string 1, -73 genebuild.sample_location genebuild.sample_location Sample Location Data string 1, -74 assembly.stats.coverage_depth assembly.coverage_depth assembly.coverage_depth string 0, -75 assembly.web_accession_source assembly.web_accession_source assembly.web_accession_source string 0, -76 assembly.web_accession_type assembly.web_accession_type assembly.web_accession_type string 0, -77 genebuild.id genebuild.id genebuild.id string 0, -78 genebuild.stats.nc_average_intron_length Average intron length per non-coding transcript Mean intron length bp 0, -79 genebuild.projection_source_db genebuild.projection_source_db genebuild.projection_source_db string 0, -80 assembly.long_name assembly.long_name assembly.long_name string 0, -81 assembly.url_name assembly.url_name assembly.url_name string 0, -82 genebuild.havana_datafreeze_date genebuild.havana_datafreeze_date genebuild.havana_datafreeze_date string 0, -83 assembly.version assembly.version assembly.version string 0, -84 genebuild.provider_name genebuild.provider_name genebuild.provider_name string 1, -85 genebuild.provider_url genebuild.provider_url genebuild.provider_url string 1, -119 variation.short_variants Short variants Small-scale genetic variations integer 0, -120 variation.sample_variant variation.sample_variant variation.sample_variant string 0, -123 variation.short_variants_with_phenotype_assertions Short variation with phenotype assertion Short variation with phenotype assertion string 0, -161 compara.stats.homology_coverage compara.homology_coverage compara.homology_coverage float 0, -162 compara.homology_reference_species compara.homology_reference_species compara.homology_reference_species string 0, -163 regulation.stats.open_chromatin_count regulation.open_chromatin_count Number of open chromatin regions integer 0, -164 regulation.stats.promoter_count regulation.promoter_count Number of promoters integer 0, -165 regulation.stats.enhancer_count regulation.enhancer_count Number of enhancers integer 0, -166 regulation.stats.ctcf_count regulation.ctcf_count Number of CTCF binding sites integer 0, -167 regulation.stats.tfbs_count regulation.tfbs_count Number of regions enriched for transcription factor binding integer 0, -168 assembly.tol_id assembly.tol_id assembly.tol_id string 0, -169 genebuild.annotation_source genebuild.annotation_source genebuild.annotation_source string 1, -170 genebuild.stats.nc_total_exons Exons in non-coding genes Total exon count integer 0, -179 assembly.description assembly.description assembly.description string 0, -180 assembly.master_accession assembly.master_accession assembly.master_accession string 0, -181 assembly.alt_accession assembly.alt_accession assembly.alt_accession string 0, -182 dataset.build_start Dataset Build start date Dataset Build start date string 0, -183 dataset.build_end Dataset Build completed Dataset Build completed string 0, -197 genebuild.provider_version genebuild.provider_version genebuild.provider_version string 0, +1 assembly.accession assembly.accession assembly.accession string 1 +2 assembly.stats.chromosomes Chromosomes or plasmids Number of structures in cells containing DNA integer 0 +3 assembly.stats.component_sequences Component sequences Part of the primary sequences in assembly integer 0 +4 assembly.stats.contig_n50 Contig N50 Median size of contigs in a genome assembly bp 0 +5 assembly.date assembly.date assembly.date string 0 +6 assembly.default assembly.default assembly.default string 0 +7 assembly.stats.gc_percentage Average GC content Percentage of nucleotides in DNA that are G or C percent 0 +8 assembly.is_reference assembly.is_reference assembly.is_reference string 0 +9 assembly.level assembly.level assembly.level string 0 +10 assembly.mapping assembly.mapping assembly.mapping string 0 +11 assembly.name assembly.name assembly.name string 1 +12 assembly.provider_name assembly.provider_name assembly.provider_name string 0 +13 assembly.provider_url assembly.provider_url assembly.provider_url string 0 +14 assembly.stats.spanned_gaps Spanned gaps Number of gaps covered by sequencing reads integer 0 +15 assembly.tolid assembly.tolid assembly.tolid string 0 +16 assembly.stats.toplevel_sequences Top level sequences Primary sequences in a genome assembly integer 0 +17 assembly.stats.total_coding_sequence_length Total coding sequence length Total length of all coding sequences bp 0 +18 assembly.stats.total_gap_length Total gap length Total length of all gaps in a genome assembly bp 0 +19 assembly.stats.total_genome_length Total genome length Total length of all genomic sequences bp 0 +20 assembly.ucsc_alias assembly.ucsc_alias assembly.ucsc_alias string 0 +21 genebuild.stats.average_cds_length Average CDS length Average length of coding sequences float 0 +22 genebuild.stats.average_coding_exons_per_coding_gene Average coding exons per coding gene Average coding exons per coding gene string 0 +23 genebuild.stats.average_coding_exons_per_transcript Average coding exons per transcript Average coding exons per coding transcript float 0 +24 genebuild.stats.average_coding_exon_length Average exon length per coding gene Average length of coding exons bp 0 +25 genebuild.stats.average_exon_length Average exon length Average length of exons bp 0 +26 genebuild.stats.average_genomic_span Average coding genomic span Average length of all genomic regions bp 0 +27 genebuild.stats.average_intron_length Average intron length Average intron length per coding gene bp 0 +28 genebuild.stats.average_sequence_legth Average coding sequence length Average length of sequences in genome bp 0 +29 genebuild.stats.coding_genes Coding genes Genes that code for proteins integer 0 +30 genebuild.stats.coding_transcripts Coding transcripts Transcripts that code for proteins integer 0 +31 genebuild.stats.coding_transcripts_per_gene Average coding transcripts per gene Average coding transcripts per gene float 0 +32 genebuild.hash genebuild.hash genebuild.hash string 0 +33 genebuild.initial_release_date genebuild.initial_release_date genebuild.initial_release_date string 0 +34 genebuild.last_geneset_update genebuild.last_geneset_update genebuild.last_geneset_update string 1 +35 genebuild.level genebuild.level genebuild.level string 0 +36 genebuild.longest_gene_length Longest coding gene Length of longest gene bp 0 +37 genebuild.method genebuild.method genebuild.method string 0 +38 genebuild.method_display genebuild.method_display genebuild.method_display string 0 +39 genebuild.stats.nc_average_exons_per_transcript Average exons per non-coding transcript Mean exon count per transcript float 0 +40 genebuild.stats.nc_average_exon_length Average exon length per non-coding transcript Mean exon length bp 0 +41 genebuild.stats.nc_average_genomic_span Average non-coding genomic span Mean length of all genomic regions bp 0 +42 genebuild.stats.nc_average_sequence_length Average non-coding sequence length Mean length of all sequences bp 0 +43 genebuild.stats.nc_longest_gene_length Longest non-coding gene Length of longest non-coding gene bp 0 +44 genebuild.stats.nc_long_non_coding_genes Long non-coding genes Long genes not coding for proteins integer 0 +45 genebuild.stats.nc_misc_non_coding_genes Misc. non-coding genes Miscellaneous non-coding genes integer 0 +46 genebuild.stats.nc_non_coding_genes Non-coding genes Genes that don't code for proteins integer 0 +47 genebuild.stats.nc_shortest_gene_length Shortest non-coding gene Length of shortest gene bp 0 +48 genebuild.stats.nc_small_non_coding_genes Small non-coding genes Small genes not coding for proteins integer 0 +49 genebuild.stats.nc_total_introns Introns in non-coding genes Total intron count integer 0 +50 genebuild.stats.nc_total_transcripts Non-coding transcripts Total RNA transcript count integer 0 +51 genebuild.stats.nc_transcripts_per_gene Average transcripts per non-coding gene Mean transcripts count per gene float 0 +52 genebuild.stats.ps_average_exons_per_transcript Average intron length per pseudogene Mean exon count per pseudogene transcript float 0 +53 genebuild.stats.ps_average_exon_length Average exon length per pseudogene Mean pseudogene exon length bp 0 +54 genebuild.stats.ps_average_genomic_span Average pseudogene genomic span Mean length of pseudogene regions bp 0 +55 genebuild.stats.ps_average_intron_length Average intron length per pseudogene Mean pseudogene intron length bp 0 +56 genebuild.stats.ps_average_sequence_length Average pseudogene sequence length Mean length of pseudogene sequences bp 0 +57 genebuild.stats.ps_longest_gene_length Longest pseudogene Length of longest pseudogene bp 0 +58 genebuild.stats.ps_pseudogenes Pseudogenes Genes which don't code functional protiens integer 0 +59 genebuild.stats.ps_shortest_gene_length Shortest pseudogene Length of shortest pseudogene bp 0 +60 genebuild.stats.ps_total_exons Exons in pseudogenes Total exon count in pseudogenes integer 0 +61 genebuild.stats.ps_total_introns Introns in pseudogenes Total intron count in pseudogenes integer 0 +62 genebuild.stats.ps_total_transcripts Transcripts in pseudogenes Total pseudogene RNA transcript count integer 0 +63 genebuild.stats.ps_transcripts_per_gene Average transcripts per pseudogene Mean pseudogene transcripts count per pseudogene float 0 +64 genebuild.stats.shortest_gene_length Shortest coding gene Length of shortest gene bp 0 +65 genebuild.start_date genebuild.start_date genebuild.start_date string 1 +66 genebuild.stats.total_coding_exons Exons in coding genes Total number of coding exons integer 0 +67 genebuild.stats.total_exons Exons in genes Total number of exons integer 0 +68 genebuild.stats.total_introns Introns in coding genes Total number of introns integer 0 +69 genebuild.stats.total_transcripts Transcripts in coding genes Total number of RNA transcripts integer 0 +70 genebuild.stats.transcripts_per_gene Average transcripts per coding gene Average number of transcripts per gene float 0 +71 genebuild.version genebuild.version genebuild.version string 1 +72 genebuild.sample_gene genebuild.sample_gene Sample Gene Data string 1 +73 genebuild.sample_location genebuild.sample_location Sample Location Data string 1 +74 assembly.stats.coverage_depth assembly.coverage_depth assembly.coverage_depth string 0 +75 assembly.web_accession_source assembly.web_accession_source assembly.web_accession_source string 0 +76 assembly.web_accession_type assembly.web_accession_type assembly.web_accession_type string 0 +77 genebuild.id genebuild.id genebuild.id string 0 +78 genebuild.stats.nc_average_intron_length Average intron length per non-coding transcript Mean intron length bp 0 +79 genebuild.projection_source_db genebuild.projection_source_db genebuild.projection_source_db string 0 +80 assembly.long_name assembly.long_name assembly.long_name string 0 +81 assembly.url_name assembly.url_name assembly.url_name string 0 +82 genebuild.havana_datafreeze_date genebuild.havana_datafreeze_date genebuild.havana_datafreeze_date string 0 +83 assembly.version assembly.version assembly.version string 0 +84 genebuild.provider_name genebuild.provider_name genebuild.provider_name string 1 +85 genebuild.provider_url genebuild.provider_url genebuild.provider_url string 1 +119 variation.short_variants Short variants Small-scale genetic variations integer 0 +120 variation.sample_variant variation.sample_variant variation.sample_variant string 0 +123 variation.short_variants_with_phenotype_assertions Short variation with phenotype assertion Short variation with phenotype assertion string 0 +161 compara.stats.homology_coverage compara.homology_coverage compara.homology_coverage float 0 +162 compara.homology_reference_species compara.homology_reference_species compara.homology_reference_species string 0 +163 regulation.stats.open_chromatin_count regulation.open_chromatin_count Number of open chromatin regions integer 0 +164 regulation.stats.promoter_count regulation.promoter_count Number of promoters integer 0 +165 regulation.stats.enhancer_count regulation.enhancer_count Number of enhancers integer 0 +166 regulation.stats.ctcf_count regulation.ctcf_count Number of CTCF binding sites integer 0 +167 regulation.stats.tfbs_count regulation.tfbs_count Number of regions enriched for transcription factor binding integer 0 +168 assembly.tol_id assembly.tol_id assembly.tol_id string 0 +169 genebuild.annotation_source genebuild.annotation_source genebuild.annotation_source string 1 +170 genebuild.stats.nc_total_exons Exons in non-coding genes Total exon count integer 0 +179 assembly.description assembly.description assembly.description string 0 +180 assembly.master_accession assembly.master_accession assembly.master_accession string 0 +181 assembly.alt_accession assembly.alt_accession assembly.alt_accession string 0 +182 dataset.build_start Dataset Build start date Dataset Build start date string 0 +183 dataset.build_end Dataset Build completed Dataset Build completed string 0 +197 genebuild.provider_version genebuild.provider_version genebuild.provider_version string 0 diff --git a/src/tests/databases/ensembl_genome_metadata/dataset.txt b/src/tests/databases/ensembl_genome_metadata/dataset.txt index 77181619..cd62f5be 100644 --- a/src/tests/databases/ensembl_genome_metadata/dataset.txt +++ b/src/tests/databases/ensembl_genome_metadata/dataset.txt @@ -1,41 +1,41 @@ -1 02104faf-3fee-4f28-b53c-605843dac941 assembly \N 2023-09-22 15:01:44.000000 GCA_000005845.2 1 1 Released \N -2 cc3c7f95-b5dc-4cc1-aa15-2817c89bd1e2 genebuild EXT01 2023-09-22 15:01:44.000000 GCA_000005845.2_EXT01 1 2 Released \N -7 dc1b508e-f148-4a6f-b051-1a0c53142cf5 assembly \N 2023-09-22 15:02:00.000000 GCA_018473315.1 4 1 Released \N -9 45aec801-4fe7-4ac2-9afa-19aea2a8409e assembly \N 2023-09-22 15:02:01.000000 GCA_018469415.1 5 1 Released \N -11 5cda83f4-521c-4713-b2fe-b3ede2f1a51e assembly \N 2023-09-22 15:02:02.000000 GCA_018469875.1 6 1 Processed \N -13 3f9bf8d6-1514-4657-9f73-38a7354a80b8 assembly \N 2023-09-22 15:02:04.000000 GCA_018505825.1 7 1 Processed \N -14 53936715-1371-4343-95af-f39d06943db7 genebuild ENS01 2023-09-22 15:02:04.000000 GCA_018505825.1_ENS01 7 2 Processed \N -17 d641779c-2add-46ce-acf4-a2b6f15274b1 assembly \N 2023-09-22 15:02:11.000000 GCA_018852615.1 9 1 Processed \N -23 06b4892b-8e34-49bc-be84-8126e5a7cf93 assembly \N 2023-09-22 15:03:01.000000 GCA_000002765.2 14 1 Released \N -24 f202cd36-d0dc-40df-9dd6-a8218e0d1366 genebuild EXT01 2023-09-22 15:03:01.000000 GCA_000002765.2_EXT01 14 2 Released \N -37 6f8bd121-0345-4b77-9dc1-d567ac13447d assembly \N 2023-09-22 15:03:02.000000 GCA_021950905.1 18 1 Processed \N -38 2ef7c056-847e-4742-a68b-18c3ece068aa genebuild ENS01 2023-09-22 15:03:02.000000 GCA_021950905.1_ENS01 18 2 Submitted \N -61 3474e0d6-d031-40bc-a4ae-230236886568 assembly \N 2023-09-22 15:03:22.000000 GCA_000001405.14 40 1 Released \N -62 e456d1c2-eea2-40f1-83ee-31912905b695 genebuild GENCODE19 2023-09-22 15:03:22.000000 GCA_000001405.14_GENCODE19 40 2 Released \N -147 999315f6-6d25-481f-a017-297f7e1490c8 assembly \N 2023-09-22 15:04:29.000000 GCA_900519105.1 79 1 Released \N -148 287a5483-55a4-46e6-a58b-a84ba0ddacd6 genebuild EXT01 2023-09-22 15:04:29.000000 GCA_900519105.1_EXT01 79 2 Released \N -171 c813f7b7-645c-45ac-8536-08190fd7daa0 assembly \N 2023-09-22 15:04:45.000000 GCA_000001405.29 92 1 Released \N -172 949defef-c4d2-4ab1-8a73-f41d2b3c7719 genebuild GENCODE44 2023-09-22 15:04:45.000000 GCA_000001405.29_GENCODE44 92 2 Released \N -177 3c67123a-e9e1-41ef-9014-2aadc8acf12a assembly \N 2023-09-22 15:04:50.000000 GCA_018505865.1 97 1 Processed \N -178 ed8ca387-38e3-4bfe-8b85-757a59b95126 genebuild ENS01 2023-09-22 15:04:50.000000 GCA_018505865.1_ENS01 97 2 Processed \N -183 8801edaf-86ec-4799-8fd4-a59077f04c05 assembly \N 2023-09-22 15:04:53.000000 GCA_018852605.1 100 1 Processed \N -184 11a0be7f-99ae-45d3-a004-dc19bb562330 genebuild ENS01 2023-09-22 15:04:53.000000 GCA_018852605.1_ENS01 100 2 Processed \N -197 fd27883a-e5d3-4502-b774-65d3cc4f4e18 assembly \N 2023-09-22 15:04:56.000000 GCA_018469925.1 107 1 Processed \N -249 786344d1-a71f-4bab-aa37-6ee315ed60a4 assembly \N 2023-09-22 15:05:37.000000 GCA_018469425.1 135 1 Processed \N -250 2bc8874e-6672-4293-89d6-0b837005177c genebuild ENS01 2023-09-22 15:05:37.000000 GCA_018469425.1_ENS01 135 2 Processed \N -337 eb451e00-7abb-4462-82bf-f29f6ed3dc1b assembly \N 2023-09-22 15:06:39.000000 GCA_021951015.1 179 1 Processed \N -338 bd63a676-45ff-494a-b26f-2b779cb6c180 genebuild ENS01 2023-09-22 15:06:39.000000 GCA_021951015.1_ENS01 179 2 Processed \N -347 6790a2a6-b178-4ab2-a12b-aad3d5511713 assembly \N 2023-09-22 15:06:43.000000 GCA_018473295.1 185 1 Released \N -348 23d52e01-2e3d-495f-b345-df41c605caa9 genebuild ENS01 2023-09-22 15:06:43.000000 GCA_018473295.1_ENS01 185 2 Released \N -401 3b58ee8a-8f8d-4dfe-bb58-44c2ed57f229 assembly \N 2023-09-22 15:06:55.000000 GCA_000146045.2 214 1 Released \N -402 cfef61f8-7e24-4ed6-945f-baca1b2664a3 genebuild EXT01 2023-09-22 15:06:55.000000 GCA_000146045.2_EXT01 214 2 Released \N -405 6c1896f9-10dd-423e-a1ff-db8b5815cb66 assembly \N 2023-09-22 15:06:58.000000 GCA_000002985.3 217 1 Released \N -406 ea69f164-cc77-4671-bf97-c7f537dc400e genebuild EXT01 2023-09-22 15:06:58.000000 GCA_000002985.3_EXT01 217 2 Released \N -888 9d717ead-ffe0-4fc1-b58c-3c057b754021 genebuild ENS01 2023-11-07 11:18:55.000000 GCA_018473315.1_ENS01 4 2 Released \N -890 1c759aca-63a1-4eea-abe8-ef5f298fe6e2 genebuild ENS01 2023-11-07 11:18:57.000000 GCA_018469415.1_ENS01 5 2 Released \N -892 664088c7-356e-418c-adb2-15945b7ebc4b genebuild ENS01 2023-11-07 11:19:00.000000 GCA_018469875.1_ENS01 6 2 Processed \N -896 f9690d7e-26c1-459d-8102-0c4a1a468806 genebuild ENS01 2023-11-07 11:19:16.000000 GCA_018852615.1_ENS01 9 2 Processed \N -1006 66db32ae-974f-480c-a60b-63cc49d00f68 genebuild ENS01 2023-11-07 11:22:53.000000 GCA_018469925.1_ENS01 107 2 Processed \N +1 02104faf-3fee-4f28-b53c-605843dac941 assembly \N 2023-09-22 15:01:44 GCA_000005845.2 1 1 Released \N +2 cc3c7f95-b5dc-4cc1-aa15-2817c89bd1e2 genebuild EXT01 2023-09-22 15:01:44 GCA_000005845.2_EXT01 1 2 Released \N +7 dc1b508e-f148-4a6f-b051-1a0c53142cf5 assembly \N 2023-09-22 15:02:00 GCA_018473315.1 4 1 Released \N +9 45aec801-4fe7-4ac2-9afa-19aea2a8409e assembly \N 2023-09-22 15:02:01 GCA_018469415.1 5 1 Released \N +11 5cda83f4-521c-4713-b2fe-b3ede2f1a51e assembly \N 2023-09-22 15:02:02 GCA_018469875.1 6 1 Processed \N +13 3f9bf8d6-1514-4657-9f73-38a7354a80b8 assembly \N 2023-09-22 15:02:04 GCA_018505825.1 7 1 Processed \N +14 53936715-1371-4343-95af-f39d06943db7 genebuild ENS01 2023-09-22 15:02:04 GCA_018505825.1_ENS01 7 2 Processed \N +17 d641779c-2add-46ce-acf4-a2b6f15274b1 assembly \N 2023-09-22 15:02:11 GCA_018852615.1 9 1 Processed \N +23 06b4892b-8e34-49bc-be84-8126e5a7cf93 assembly \N 2023-09-22 15:03:01 GCA_000002765.2 14 1 Released \N +24 f202cd36-d0dc-40df-9dd6-a8218e0d1366 genebuild EXT01 2023-09-22 15:03:01 GCA_000002765.2_EXT01 14 2 Released \N +37 6f8bd121-0345-4b77-9dc1-d567ac13447d assembly \N 2023-09-22 15:03:02 GCA_021950905.1 18 1 Processed \N +38 2ef7c056-847e-4742-a68b-18c3ece068aa genebuild ENS01 2023-09-22 15:03:02 GCA_021950905.1_ENS01 18 2 Submitted \N +61 3474e0d6-d031-40bc-a4ae-230236886568 assembly \N 2023-09-22 15:03:22 GCA_000001405.14 40 1 Released \N +62 e456d1c2-eea2-40f1-83ee-31912905b695 genebuild GENCODE19 2023-09-22 15:03:22 GCA_000001405.14_GENCODE19 40 2 Released \N +147 999315f6-6d25-481f-a017-297f7e1490c8 assembly \N 2023-09-22 15:04:29 GCA_900519105.1 79 1 Released \N +148 287a5483-55a4-46e6-a58b-a84ba0ddacd6 genebuild EXT01 2023-09-22 15:04:29 GCA_900519105.1_EXT01 79 2 Released \N +171 c813f7b7-645c-45ac-8536-08190fd7daa0 assembly \N 2023-09-22 15:04:45 GCA_000001405.29 92 1 Released \N +172 949defef-c4d2-4ab1-8a73-f41d2b3c7719 genebuild GENCODE44 2023-09-22 15:04:45 GCA_000001405.29_GENCODE44 92 2 Released \N +177 3c67123a-e9e1-41ef-9014-2aadc8acf12a assembly \N 2023-09-22 15:04:50 GCA_018505865.1 97 1 Processed \N +178 ed8ca387-38e3-4bfe-8b85-757a59b95126 genebuild ENS01 2023-09-22 15:04:50 GCA_018505865.1_ENS01 97 2 Processed \N +183 8801edaf-86ec-4799-8fd4-a59077f04c05 assembly \N 2023-09-22 15:04:53 GCA_018852605.1 100 1 Processed \N +184 11a0be7f-99ae-45d3-a004-dc19bb562330 genebuild ENS01 2023-09-22 15:04:53 GCA_018852605.1_ENS01 100 2 Processed \N +197 fd27883a-e5d3-4502-b774-65d3cc4f4e18 assembly \N 2023-09-22 15:04:56 GCA_018469925.1 107 1 Processed \N +249 786344d1-a71f-4bab-aa37-6ee315ed60a4 assembly \N 2023-09-22 15:05:37 GCA_018469425.1 135 1 Processed \N +250 2bc8874e-6672-4293-89d6-0b837005177c genebuild ENS01 2023-09-22 15:05:37 GCA_018469425.1_ENS01 135 2 Processed \N +337 eb451e00-7abb-4462-82bf-f29f6ed3dc1b assembly \N 2023-09-22 15:06:39 GCA_021951015.1 179 1 Processed \N +338 bd63a676-45ff-494a-b26f-2b779cb6c180 genebuild ENS01 2023-09-22 15:06:39 GCA_021951015.1_ENS01 179 2 Processed \N +347 6790a2a6-b178-4ab2-a12b-aad3d5511713 assembly \N 2023-09-22 15:06:43 GCA_018473295.1 185 1 Released \N +348 23d52e01-2e3d-495f-b345-df41c605caa9 genebuild ENS01 2023-09-22 15:06:43 GCA_018473295.1_ENS01 185 2 Released \N +401 3b58ee8a-8f8d-4dfe-bb58-44c2ed57f229 assembly \N 2023-09-22 15:06:55 GCA_000146045.2 214 1 Released \N +402 cfef61f8-7e24-4ed6-945f-baca1b2664a3 genebuild EXT01 2023-09-22 15:06:55 GCA_000146045.2_EXT01 214 2 Released \N +405 6c1896f9-10dd-423e-a1ff-db8b5815cb66 assembly \N 2023-09-22 15:06:58 GCA_000002985.3 217 1 Released \N +406 ea69f164-cc77-4671-bf97-c7f537dc400e genebuild EXT01 2023-09-22 15:06:58 GCA_000002985.3_EXT01 217 2 Released \N +888 9d717ead-ffe0-4fc1-b58c-3c057b754021 genebuild ENS01 2023-11-07 11:18:55 GCA_018473315.1_ENS01 4 2 Released \N +890 1c759aca-63a1-4eea-abe8-ef5f298fe6e2 genebuild ENS01 2023-11-07 11:18:57 GCA_018469415.1_ENS01 5 2 Released \N +892 664088c7-356e-418c-adb2-15945b7ebc4b genebuild ENS01 2023-11-07 11:19:00 GCA_018469875.1_ENS01 6 2 Processed \N +896 f9690d7e-26c1-459d-8102-0c4a1a468806 genebuild ENS01 2023-11-07 11:19:16 GCA_018852615.1_ENS01 9 2 Processed \N +1006 66db32ae-974f-480c-a60b-63cc49d00f68 genebuild ENS01 2023-11-07 11:22:53 GCA_018469925.1_ENS01 107 2 Processed \N 1391 bf1f5064-8520-4f19-84e4-449aa6c1c1e2 variation 1.0 2023-11-09 12:49:25.273751 GRCh38 673 3 Released \N 1392 5b869bbb-098f-4827-afc0-532a2bc88903 variation 1.0 2023-11-09 12:49:25.797822 GRCh37 674 3 Released \N 1393 5717d47e-dad9-4c2d-b015-c055bc93e831 evidence 1.0 2023-11-09 12:49:26.267728 GRCh38 673 4 Released \N @@ -84,416 +84,416 @@ 8130 5b618784-a5ff-46cc-8102-b082ffb6e447 compara_homologies 1.0 2024-02-06 21:41:47.150011 Compara homologies 368 6 Submitted \N 8392 b6472939-9e49-4d46-b93e-304910acabf3 compara_homologies 1.0 2024-02-06 21:44:00.982498 Compara homologies 4352 6 Processed \N 8661 a5bf42be-63c1-4616-9af1-bc03aea92643 compara_homologies 1.0 2024-02-06 21:46:14.099319 Compara homologies 443 6 Submitted \N -8662 af8eee44-ca56-4baf-a5f1-ad60d1165f3a genebuild_compute ENS01 2024-04-24 10:18:12.000000 From 23d52e01-2e3d-495f-b345-df41c605caa9 185 8 Released 348 -8663 a0fef323-23b9-4d4c-87b3-42f290dffbc7 xrefs ENS01 2024-04-24 10:18:12.000000 From af8eee44-ca56-4baf-a5f1-ad60d1165f3a 185 13 Released 8662 -8664 5d12b439-f994-408b-a7cc-88a0ce2a1c5e protein_features ENS01 2024-04-24 10:18:12.000000 From af8eee44-ca56-4baf-a5f1-ad60d1165f3a 185 14 Released 8662 -8665 fd7c81b9-bd96-4074-a78f-ce86059d3a55 alpha_fold ENS01 2024-04-24 10:18:12.000000 From af8eee44-ca56-4baf-a5f1-ad60d1165f3a 185 15 Released 8662 -8666 e4630a33-1d85-4a93-9c3d-ba23f531e900 genebuild_files ENS01 2024-04-24 10:18:12.000000 From 23d52e01-2e3d-495f-b345-df41c605caa9 185 9 Released 348 -8667 5dc9ebba-a6f0-4380-9f9b-5735855c8c0b blast ENS01 2024-04-24 10:18:12.000000 From e4630a33-1d85-4a93-9c3d-ba23f531e900 185 16 Released 8666 -8668 12304159-93ac-4597-bbfb-fc487a580bd2 ftp_dumps ENS01 2024-04-24 10:18:12.000000 From e4630a33-1d85-4a93-9c3d-ba23f531e900 185 17 Released 8666 -8669 aaf2c600-821f-4ade-a3e7-fde21c333060 genebuild_web ENS01 2024-04-24 10:18:12.000000 From 23d52e01-2e3d-495f-b345-df41c605caa9 185 11 Released 348 -8670 97e9f169-4ba2-4d44-b958-7fc3233c2c24 thoas_dumps ENS01 2024-04-24 10:18:12.000000 From aaf2c600-821f-4ade-a3e7-fde21c333060 185 18 Released 8669 -8671 45c72ec3-7b5e-4b5e-83f8-1fc5790b1ad4 browser_files ENS01 2024-04-24 10:18:12.000000 From aaf2c600-821f-4ade-a3e7-fde21c333060 185 20 Released 8669 -8672 7a33c596-7883-4638-86d4-9aa4ac266110 checksums ENS01 2024-04-24 10:18:12.000000 From aaf2c600-821f-4ade-a3e7-fde21c333060 185 23 Released 8669 -8673 d6f3a493-9abf-4cef-81b7-dbf7bd0a8c4c genebuild_web ENS01 2024-04-24 10:18:12.000000 From 23d52e01-2e3d-495f-b345-df41c605caa9 185 12 Released 348 -8674 1ff09568-0971-4907-b023-2e81b9d73e61 thoas_load ENS01 2024-04-24 10:18:12.000000 From d6f3a493-9abf-4cef-81b7-dbf7bd0a8c4c 185 19 Released 8673 -8675 ff3d51eb-154b-4665-887c-c406cc3bc78b genebuild_track ENS01 2024-04-24 10:18:12.000000 From d6f3a493-9abf-4cef-81b7-dbf7bd0a8c4c 185 21 Released 8673 -8676 76bc09f9-ab83-49ad-9b14-b81dd2ee5eb0 refget_load ENS01 2024-04-24 10:18:12.000000 From d6f3a493-9abf-4cef-81b7-dbf7bd0a8c4c 185 24 Released 8673 -8677 5fb17152-b5bd-4a8f-8b00-1e87b1bdd036 genebuild_compute ENS01 2024-04-24 10:18:12.000000 From 9d717ead-ffe0-4fc1-b58c-3c057b754021 4 8 Released 888 -8678 8a49f103-b405-4f54-8714-980007cfe776 xrefs ENS01 2024-04-24 10:18:12.000000 From 5fb17152-b5bd-4a8f-8b00-1e87b1bdd036 4 13 Released 8677 -8679 129e0e2b-b778-47d5-9252-822af8adbf5b protein_features ENS01 2024-04-24 10:18:12.000000 From 5fb17152-b5bd-4a8f-8b00-1e87b1bdd036 4 14 Released 8677 -8680 d811d1ff-8e73-4215-b622-4da5d1ae68bc alpha_fold ENS01 2024-04-24 10:18:12.000000 From 5fb17152-b5bd-4a8f-8b00-1e87b1bdd036 4 15 Released 8677 -8681 b57a6524-00c5-423a-b569-57e2039d5f75 genebuild_files ENS01 2024-04-24 10:18:12.000000 From 9d717ead-ffe0-4fc1-b58c-3c057b754021 4 9 Released 888 -8682 90d32255-8476-4d33-8aca-cd05b65f6b6f blast ENS01 2024-04-24 10:18:12.000000 From b57a6524-00c5-423a-b569-57e2039d5f75 4 16 Released 8681 -8683 3243f1ac-38aa-412b-9a2a-c5edf0336a2f ftp_dumps ENS01 2024-04-24 10:18:12.000000 From b57a6524-00c5-423a-b569-57e2039d5f75 4 17 Released 8681 -8684 c68f4857-0f85-48eb-a9d1-37e7e9cb5d71 genebuild_web ENS01 2024-04-24 10:18:12.000000 From 9d717ead-ffe0-4fc1-b58c-3c057b754021 4 11 Released 888 -8685 98cb07a1-2a1d-496a-a0a7-168662eda07b thoas_dumps ENS01 2024-04-24 10:18:12.000000 From c68f4857-0f85-48eb-a9d1-37e7e9cb5d71 4 18 Released 8684 -8686 3b9fee1b-0c94-4345-9599-919ad721a7da browser_files ENS01 2024-04-24 10:18:12.000000 From c68f4857-0f85-48eb-a9d1-37e7e9cb5d71 4 20 Released 8684 -8687 8978bd71-c1b1-40b5-8628-1dd84115badd checksums ENS01 2024-04-24 10:18:12.000000 From c68f4857-0f85-48eb-a9d1-37e7e9cb5d71 4 23 Released 8684 -8688 17e767e8-4f6a-40a2-aaa1-ee4f03e37567 genebuild_web ENS01 2024-04-24 10:18:12.000000 From 9d717ead-ffe0-4fc1-b58c-3c057b754021 4 12 Released 888 -8689 53979850-c127-4a85-a680-9183978bb250 thoas_load ENS01 2024-04-24 10:18:12.000000 From 17e767e8-4f6a-40a2-aaa1-ee4f03e37567 4 19 Released 8688 -8690 2b8d9066-8a02-4d47-ab29-c39f43ccfc53 genebuild_track ENS01 2024-04-24 10:18:12.000000 From 17e767e8-4f6a-40a2-aaa1-ee4f03e37567 4 21 Released 8688 -8691 4d418be3-ea1b-4f36-afa4-c40d113b3910 refget_load ENS01 2024-04-24 10:18:12.000000 From 17e767e8-4f6a-40a2-aaa1-ee4f03e37567 4 24 Released 8688 -8692 3cfe16ce-8a7e-49c8-b719-2affce984771 genebuild_compute ENS01 2024-04-24 10:18:13.000000 From 1c759aca-63a1-4eea-abe8-ef5f298fe6e2 5 8 Released 890 -8693 f5eeacaa-8ee4-4739-8aed-a6aeaaadd13e xrefs ENS01 2024-04-24 10:18:13.000000 From 3cfe16ce-8a7e-49c8-b719-2affce984771 5 13 Released 8692 -8694 808c43bc-7950-4e32-935b-ef13f1a869c1 protein_features ENS01 2024-04-24 10:18:13.000000 From 3cfe16ce-8a7e-49c8-b719-2affce984771 5 14 Released 8692 -8695 7413ae88-d840-47ca-a602-713e03e6b123 alpha_fold ENS01 2024-04-24 10:18:13.000000 From 3cfe16ce-8a7e-49c8-b719-2affce984771 5 15 Released 8692 -8696 6a5f98e4-2f2f-4c88-9172-d02dc623c42f genebuild_files ENS01 2024-04-24 10:18:13.000000 From 1c759aca-63a1-4eea-abe8-ef5f298fe6e2 5 9 Released 890 -8697 cd9f881d-1716-425d-9879-472193cbf337 blast ENS01 2024-04-24 10:18:13.000000 From 6a5f98e4-2f2f-4c88-9172-d02dc623c42f 5 16 Released 8696 -8698 84d2b150-1e5b-49b2-a35c-14596d503ae0 ftp_dumps ENS01 2024-04-24 10:18:13.000000 From 6a5f98e4-2f2f-4c88-9172-d02dc623c42f 5 17 Released 8696 -8699 5b63c887-d867-411c-b138-536ed0c430de genebuild_web ENS01 2024-04-24 10:18:13.000000 From 1c759aca-63a1-4eea-abe8-ef5f298fe6e2 5 11 Released 890 -8700 3a7c3603-acfa-4803-8c0c-c413501c6180 thoas_dumps ENS01 2024-04-24 10:18:13.000000 From 5b63c887-d867-411c-b138-536ed0c430de 5 18 Released 8699 -8701 6a52d021-242b-4e7a-84aa-b2f08d6b1e89 browser_files ENS01 2024-04-24 10:18:13.000000 From 5b63c887-d867-411c-b138-536ed0c430de 5 20 Released 8699 -8702 30f5310e-7e78-4027-aa32-82de71946e20 checksums ENS01 2024-04-24 10:18:13.000000 From 5b63c887-d867-411c-b138-536ed0c430de 5 23 Released 8699 -8703 f67689b2-8c52-4c3e-89da-70520e0613d8 genebuild_web ENS01 2024-04-24 10:18:13.000000 From 1c759aca-63a1-4eea-abe8-ef5f298fe6e2 5 12 Released 890 -8704 e4e75159-a56c-4a38-ac81-b74f7e89c022 thoas_load ENS01 2024-04-24 10:18:13.000000 From f67689b2-8c52-4c3e-89da-70520e0613d8 5 19 Released 8703 -8705 981eb155-b2c5-4571-955f-f2d7574ef5eb genebuild_track ENS01 2024-04-24 10:18:13.000000 From f67689b2-8c52-4c3e-89da-70520e0613d8 5 21 Released 8703 -8706 078f49f0-cc6e-4e4a-bfee-f8fc240a635a refget_load ENS01 2024-04-24 10:18:13.000000 From f67689b2-8c52-4c3e-89da-70520e0613d8 5 24 Released 8703 -8707 705c3da3-186f-42f7-bd2d-795285e9b246 evidence 1.0 2024-04-24 10:18:13.000000 From 08543d8d-2110-46f3-a9b6-ac58c4af8202 576 4 Released 1475 -8708 bb98fc3a-30af-41b2-9dac-29d580b42b68 short_variant 1.0 2024-04-24 10:18:13.000000 From 08543d8d-2110-46f3-a9b6-ac58c4af8202 576 5 Released 1475 -8709 4b9b9585-8570-4f26-8819-a45f92df8d23 variation_ftp 1.0 2024-04-24 10:18:13.000000 From 08543d8d-2110-46f3-a9b6-ac58c4af8202 576 29 Released 1475 -8710 7ca071a6-2ab3-4535-8c52-a21cb012fe0c browser_files 1.0 2024-04-24 10:18:13.000000 From 08543d8d-2110-46f3-a9b6-ac58c4af8202 576 31 Released 1475 -8711 9d5325d8-49ba-4606-aaa2-c7269a19f5f7 variation_track 1.0 2024-04-24 10:18:13.000000 From 08543d8d-2110-46f3-a9b6-ac58c4af8202 576 32 Released 1475 -8712 d6d6f12a-c806-4db0-99dd-a667fbd7c191 evidence 1.0 2024-04-24 10:18:13.000000 From dd28ea84-6890-4af8-bd2b-2caa9917f221 608 4 Released 1507 -8713 8ed8f4b5-423e-44f1-a01d-bf2c19857374 short_variant 1.0 2024-04-24 10:18:13.000000 From dd28ea84-6890-4af8-bd2b-2caa9917f221 608 5 Released 1507 -8714 e17b4956-cf44-4ce3-bad6-c141fae5148c variation_ftp 1.0 2024-04-24 10:18:13.000000 From dd28ea84-6890-4af8-bd2b-2caa9917f221 608 29 Released 1507 -8715 48876d82-b51c-41d5-818f-5af04bcf8fc3 browser_files 1.0 2024-04-24 10:18:13.000000 From dd28ea84-6890-4af8-bd2b-2caa9917f221 608 31 Released 1507 -8716 c2dab1e0-5d72-4d50-9ec7-c8e90746ec65 variation_track 1.0 2024-04-24 10:18:13.000000 From dd28ea84-6890-4af8-bd2b-2caa9917f221 608 32 Released 1507 -8717 6d799ad1-5fe6-477f-8ddb-6a16ab3ea33a evidence 1.0 2024-04-24 10:18:13.000000 From b876cb36-6e84-4a2c-8af2-14e096d48df9 610 4 Released 1509 -8718 c06b0534-4708-4cec-913b-8b354fda0c6b short_variant 1.0 2024-04-24 10:18:13.000000 From b876cb36-6e84-4a2c-8af2-14e096d48df9 610 5 Released 1509 -8719 fff94973-5318-4821-9afa-3fd2fc0b7a4e variation_ftp 1.0 2024-04-24 10:18:13.000000 From b876cb36-6e84-4a2c-8af2-14e096d48df9 610 29 Released 1509 -8720 b5a2b993-1252-4495-bdf3-9eae0322cf39 browser_files 1.0 2024-04-24 10:18:13.000000 From b876cb36-6e84-4a2c-8af2-14e096d48df9 610 31 Released 1509 -8721 db33d965-62b4-4d83-9738-ade69df4cff5 variation_track 1.0 2024-04-24 10:18:13.000000 From b876cb36-6e84-4a2c-8af2-14e096d48df9 610 32 Released 1509 -8722 7780e084-b3ca-4df4-ba40-5a8bfea6e9a1 homology_compute 1.0 2024-04-24 10:18:13.000000 From 0bdc7428-6f81-4c96-a8d3-f5d5bff5a9b8 251 25 Released 2276 -8723 71fe75e0-50ca-4b74-a8a6-3d8d016e4227 homology_load 1.0 2024-04-24 10:18:13.000000 From 0bdc7428-6f81-4c96-a8d3-f5d5bff5a9b8 251 26 Released 2276 -8724 10879879-9a3a-4dfd-b0eb-c06699f0aada homology_ftp 1.0 2024-04-24 10:18:13.000000 From 0bdc7428-6f81-4c96-a8d3-f5d5bff5a9b8 251 27 Released 2276 -8725 2925c0ee-f987-4102-b792-4904c7b98d19 homology_compute 1.0 2024-04-24 10:18:13.000000 From 0571d77c-5cc6-4819-80bf-34a42acfc3f6 383 25 Released 2408 -8726 6f0b5633-abef-4daf-be84-489a979f8b0b homology_load 1.0 2024-04-24 10:18:13.000000 From 0571d77c-5cc6-4819-80bf-34a42acfc3f6 383 26 Released 2408 -8727 3e44f562-0cae-4165-9ef5-75fd6593d2e1 homology_ftp 1.0 2024-04-24 10:18:13.000000 From 0571d77c-5cc6-4819-80bf-34a42acfc3f6 383 27 Released 2408 -8728 80a73415-7eda-4c22-80a1-93508c1ebc03 homology_compute 1.0 2024-04-24 10:18:13.000000 From 832c1885-6b98-4bf0-b4cf-70e317399bd4 457 25 Released 2482 -8729 11185ed4-ea77-406e-bbbd-829601db2463 homology_load 1.0 2024-04-24 10:18:13.000000 From 832c1885-6b98-4bf0-b4cf-70e317399bd4 457 26 Released 2482 -8730 745f6eba-367b-4db9-92f7-7353603ce4ce homology_ftp 1.0 2024-04-24 10:18:13.000000 From 832c1885-6b98-4bf0-b4cf-70e317399bd4 457 27 Released 2482 -8731 1dcbaf3e-5179-434c-beae-1416149f30cf genebuild_compute ENS01 2024-04-24 10:18:13.000000 From 53936715-1371-4343-95af-f39d06943db7 7 8 Processed 14 -8732 d340ac5b-2f9b-44d7-bab8-99ff17516053 xrefs ENS01 2024-04-24 10:18:13.000000 From 1dcbaf3e-5179-434c-beae-1416149f30cf 7 13 Processed 8731 -8733 5a2fd22b-31ac-4e69-ad20-137b6d297cf8 protein_features ENS01 2024-04-24 10:18:13.000000 From 1dcbaf3e-5179-434c-beae-1416149f30cf 7 14 Processed 8731 -8734 7f988fe4-f4d5-4bd7-8516-2cfc767d7ec6 alpha_fold ENS01 2024-04-24 10:18:13.000000 From 1dcbaf3e-5179-434c-beae-1416149f30cf 7 15 Processed 8731 -8735 81bdc51c-4c4f-4e07-850b-562d0d964269 genebuild_files ENS01 2024-04-24 10:18:13.000000 From 53936715-1371-4343-95af-f39d06943db7 7 9 Processed 14 -8736 871842a4-566d-4b44-b883-caea737dbe70 blast ENS01 2024-04-24 10:18:13.000000 From 81bdc51c-4c4f-4e07-850b-562d0d964269 7 16 Processed 8735 -8737 5e7deba4-4293-4d6a-b954-73dfdc3be208 ftp_dumps ENS01 2024-04-24 10:18:13.000000 From 81bdc51c-4c4f-4e07-850b-562d0d964269 7 17 Processed 8735 -8738 2563efa7-e2be-401a-a89f-79ea71c17452 genebuild_web ENS01 2024-04-24 10:18:13.000000 From 53936715-1371-4343-95af-f39d06943db7 7 11 Processed 14 -8739 430221cd-df0d-4727-bd3f-8bdd1e69fb62 thoas_dumps ENS01 2024-04-24 10:18:13.000000 From 2563efa7-e2be-401a-a89f-79ea71c17452 7 18 Processed 8738 -8740 c918aa39-6dd4-4032-87da-5282e90c4142 browser_files ENS01 2024-04-24 10:18:13.000000 From 2563efa7-e2be-401a-a89f-79ea71c17452 7 20 Processed 8738 -8741 ce889e08-ab8b-4420-891b-9fb3ab5f4e81 checksums ENS01 2024-04-24 10:18:13.000000 From 2563efa7-e2be-401a-a89f-79ea71c17452 7 23 Processed 8738 -8742 96331294-fb65-457b-a3bd-5e41f1818044 genebuild_web ENS01 2024-04-24 10:18:13.000000 From 53936715-1371-4343-95af-f39d06943db7 7 12 Processed 14 -8743 a31eb1a1-1ebe-478e-b6a5-fd04ffbb0e3c thoas_load ENS01 2024-04-24 10:18:13.000000 From 96331294-fb65-457b-a3bd-5e41f1818044 7 19 Processed 8742 -8744 a1fae0e7-c124-4849-bfe5-e68c583b7826 genebuild_track ENS01 2024-04-24 10:18:13.000000 From 96331294-fb65-457b-a3bd-5e41f1818044 7 21 Processed 8742 -8745 145ad879-9c5e-4833-9645-ec0e9fb35079 refget_load ENS01 2024-04-24 10:18:13.000000 From 96331294-fb65-457b-a3bd-5e41f1818044 7 24 Processed 8742 -8746 8d55a4f8-0550-4770-aac2-c7963bfa1176 genebuild_compute ENS01 2024-04-24 10:18:13.000000 From 2bc8874e-6672-4293-89d6-0b837005177c 135 8 Processed 250 -8747 d92da251-954c-417b-8e89-03c677a60553 xrefs ENS01 2024-04-24 10:18:13.000000 From 8d55a4f8-0550-4770-aac2-c7963bfa1176 135 13 Processed 8746 -8748 819805e5-7aeb-437c-bb55-3918a7c94e48 protein_features ENS01 2024-04-24 10:18:13.000000 From 8d55a4f8-0550-4770-aac2-c7963bfa1176 135 14 Processed 8746 -8749 9c57cc1b-ee2b-4adf-968b-8b6cec556f95 alpha_fold ENS01 2024-04-24 10:18:13.000000 From 8d55a4f8-0550-4770-aac2-c7963bfa1176 135 15 Processed 8746 -8750 d4c1e848-8a50-4aae-95fe-5efb85833613 genebuild_files ENS01 2024-04-24 10:18:13.000000 From 2bc8874e-6672-4293-89d6-0b837005177c 135 9 Processed 250 -8751 24fbb4e6-45a2-4bb9-a1b7-2b14fafc5135 blast ENS01 2024-04-24 10:18:13.000000 From d4c1e848-8a50-4aae-95fe-5efb85833613 135 16 Processed 8750 -8752 32b8b190-ccb8-4d4e-8080-686fdb2bf853 ftp_dumps ENS01 2024-04-24 10:18:13.000000 From d4c1e848-8a50-4aae-95fe-5efb85833613 135 17 Processed 8750 -8753 ebd92c3c-1aaf-43c6-ae72-dd649e624ea2 genebuild_web ENS01 2024-04-24 10:18:13.000000 From 2bc8874e-6672-4293-89d6-0b837005177c 135 11 Processed 250 -8754 b41d8022-6a58-4a2b-a0fb-6776a722b7f0 thoas_dumps ENS01 2024-04-24 10:18:13.000000 From ebd92c3c-1aaf-43c6-ae72-dd649e624ea2 135 18 Processed 8753 -8755 9d4373b8-4248-4afe-ab14-d6c14c1b19ea browser_files ENS01 2024-04-24 10:18:13.000000 From ebd92c3c-1aaf-43c6-ae72-dd649e624ea2 135 20 Processed 8753 -8756 cc668846-b60c-4544-b151-84e77308595e checksums ENS01 2024-04-24 10:18:13.000000 From ebd92c3c-1aaf-43c6-ae72-dd649e624ea2 135 23 Processed 8753 -8757 7f200073-0c6e-4487-ac0b-f5cb160151f5 genebuild_web ENS01 2024-04-24 10:18:14.000000 From 2bc8874e-6672-4293-89d6-0b837005177c 135 12 Processed 250 -8758 4bedae91-0f82-478f-bba4-23f8dcb83ef0 thoas_load ENS01 2024-04-24 10:18:14.000000 From 7f200073-0c6e-4487-ac0b-f5cb160151f5 135 19 Processed 8757 -8759 8389b690-fd8d-40bf-8bb7-05254261be6b genebuild_track ENS01 2024-04-24 10:18:14.000000 From 7f200073-0c6e-4487-ac0b-f5cb160151f5 135 21 Processed 8757 -8760 e6f310ea-3a25-4adb-aa01-f514fe4d4183 refget_load ENS01 2024-04-24 10:18:14.000000 From 7f200073-0c6e-4487-ac0b-f5cb160151f5 135 24 Processed 8757 -8761 05125758-03b0-43c3-b4eb-973f05293e42 genebuild_compute ENS01 2024-04-24 10:18:14.000000 From 664088c7-356e-418c-adb2-15945b7ebc4b 6 8 Processed 892 -8762 055d833f-a5e3-4dc2-bd19-0827d05a576c xrefs ENS01 2024-04-24 10:18:14.000000 From 05125758-03b0-43c3-b4eb-973f05293e42 6 13 Processed 8761 -8763 966759bd-d77b-4f97-8502-307ba251adc8 protein_features ENS01 2024-04-24 10:18:14.000000 From 05125758-03b0-43c3-b4eb-973f05293e42 6 14 Processed 8761 -8764 154cbdc9-e1de-4d40-9e99-21a6d18cacaf alpha_fold ENS01 2024-04-24 10:18:14.000000 From 05125758-03b0-43c3-b4eb-973f05293e42 6 15 Processed 8761 -8765 9ab20e16-0d40-4145-8ad5-32e498b4cff4 genebuild_files ENS01 2024-04-24 10:18:14.000000 From 664088c7-356e-418c-adb2-15945b7ebc4b 6 9 Processed 892 -8766 0a975f98-3a5d-4270-9770-73cf4c48107b blast ENS01 2024-04-24 10:18:14.000000 From 9ab20e16-0d40-4145-8ad5-32e498b4cff4 6 16 Processed 8765 -8767 4823d7a3-b2c8-4220-8652-20436a20d9ca ftp_dumps ENS01 2024-04-24 10:18:14.000000 From 9ab20e16-0d40-4145-8ad5-32e498b4cff4 6 17 Processed 8765 -8768 a36bfaba-8751-403c-9024-ac00809cb748 genebuild_web ENS01 2024-04-24 10:18:14.000000 From 664088c7-356e-418c-adb2-15945b7ebc4b 6 11 Processed 892 -8769 d781cd94-54f7-403a-8a03-1114db2ccfe6 thoas_dumps ENS01 2024-04-24 10:18:14.000000 From a36bfaba-8751-403c-9024-ac00809cb748 6 18 Processed 8768 -8770 79d32d79-6346-4453-83d1-517ed275840b browser_files ENS01 2024-04-24 10:18:14.000000 From a36bfaba-8751-403c-9024-ac00809cb748 6 20 Processed 8768 -8771 cd5a8672-9177-4e08-8eb2-8a770ee58ce7 checksums ENS01 2024-04-24 10:18:14.000000 From a36bfaba-8751-403c-9024-ac00809cb748 6 23 Processed 8768 -8772 f6561cb1-4cae-47e7-ac63-ad2151f4b927 genebuild_web ENS01 2024-04-24 10:18:14.000000 From 664088c7-356e-418c-adb2-15945b7ebc4b 6 12 Processed 892 -8773 161453c1-3e3c-423e-a4f4-0f048d7c134c thoas_load ENS01 2024-04-24 10:18:14.000000 From f6561cb1-4cae-47e7-ac63-ad2151f4b927 6 19 Processed 8772 -8774 7f771283-0afa-4703-b534-3844646bc8e1 genebuild_track ENS01 2024-04-24 10:18:14.000000 From f6561cb1-4cae-47e7-ac63-ad2151f4b927 6 21 Processed 8772 -8775 00f2c284-5eea-43c0-a225-6bcc319a0b7f refget_load ENS01 2024-04-24 10:18:14.000000 From f6561cb1-4cae-47e7-ac63-ad2151f4b927 6 24 Processed 8772 -8776 da20e2b5-1809-494e-893f-7fb90e8032a1 genebuild_compute ENS01 2024-04-24 10:18:14.000000 From 66db32ae-974f-480c-a60b-63cc49d00f68 107 8 Processed 1006 -8777 8ec9f005-91d7-4015-be09-7b61b6d62c54 xrefs ENS01 2024-04-24 10:18:14.000000 From da20e2b5-1809-494e-893f-7fb90e8032a1 107 13 Processed 8776 -8778 fdd6e615-8ac7-41fc-b8b2-aff7aeb9c99a protein_features ENS01 2024-04-24 10:18:14.000000 From da20e2b5-1809-494e-893f-7fb90e8032a1 107 14 Processed 8776 -8779 f6d9a2a5-d744-4a90-a9b4-8656108bf921 alpha_fold ENS01 2024-04-24 10:18:14.000000 From da20e2b5-1809-494e-893f-7fb90e8032a1 107 15 Processed 8776 -8780 3f66717f-fcc1-4ea6-a6ae-1b038ec5f0cd genebuild_files ENS01 2024-04-24 10:18:14.000000 From 66db32ae-974f-480c-a60b-63cc49d00f68 107 9 Processed 1006 -8781 656bdb4a-c2f0-4ff1-93a8-6a780ba47e26 blast ENS01 2024-04-24 10:18:14.000000 From 3f66717f-fcc1-4ea6-a6ae-1b038ec5f0cd 107 16 Processed 8780 -8782 503fe667-0304-45db-ad36-860b9967290e ftp_dumps ENS01 2024-04-24 10:18:14.000000 From 3f66717f-fcc1-4ea6-a6ae-1b038ec5f0cd 107 17 Processed 8780 -8783 472c2bcc-3de5-446b-8b05-e33c3975acdb genebuild_web ENS01 2024-04-24 10:18:14.000000 From 66db32ae-974f-480c-a60b-63cc49d00f68 107 11 Processed 1006 -8784 83f43633-77ce-4164-8ec4-655707a4029d thoas_dumps ENS01 2024-04-24 10:18:14.000000 From 472c2bcc-3de5-446b-8b05-e33c3975acdb 107 18 Processed 8783 -8785 722acf20-f184-4ac4-b9ad-947de51b051e browser_files ENS01 2024-04-24 10:18:14.000000 From 472c2bcc-3de5-446b-8b05-e33c3975acdb 107 20 Processed 8783 -8786 8e15dcb4-cbd2-4971-a155-8d5956a38c41 checksums ENS01 2024-04-24 10:18:14.000000 From 472c2bcc-3de5-446b-8b05-e33c3975acdb 107 23 Processed 8783 -8787 aa4b8122-4480-4595-b2bf-c8c8f51537ce genebuild_web ENS01 2024-04-24 10:18:14.000000 From 66db32ae-974f-480c-a60b-63cc49d00f68 107 12 Processed 1006 -8788 4e7f51fc-b5f6-4aa3-ab31-b22a23d080b2 thoas_load ENS01 2024-04-24 10:18:14.000000 From aa4b8122-4480-4595-b2bf-c8c8f51537ce 107 19 Processed 8787 -8789 9770c787-1b91-4e65-bf76-5dc0d1c5c75f genebuild_track ENS01 2024-04-24 10:18:14.000000 From aa4b8122-4480-4595-b2bf-c8c8f51537ce 107 21 Processed 8787 -8790 0f140a16-de49-4566-a41d-8e1ccbc8f5d0 refget_load ENS01 2024-04-24 10:18:14.000000 From aa4b8122-4480-4595-b2bf-c8c8f51537ce 107 24 Processed 8787 -8791 e0a75f02-6ac6-4dfa-8196-50cb4803a9b8 evidence 1.0 2024-04-24 10:18:14.000000 From 5c2d6ef7-fe03-4f1a-bcc2-fb72af9ffa46 565 4 Processed 1464 -8792 04892d42-1689-4e76-a158-717f1c773a3d short_variant 1.0 2024-04-24 10:18:14.000000 From 5c2d6ef7-fe03-4f1a-bcc2-fb72af9ffa46 565 5 Processed 1464 -8793 4b9774b2-eabd-4981-b098-521b5b8a13a0 browser_files 1.0 2024-04-24 10:18:14.000000 From 5c2d6ef7-fe03-4f1a-bcc2-fb72af9ffa46 565 31 Processed 1464 -8794 f8c7383b-aaac-41cf-9ac8-dce5f99b5338 variation_ftp 1.0 2024-04-24 10:18:14.000000 From 5c2d6ef7-fe03-4f1a-bcc2-fb72af9ffa46 565 29 Processed 1464 -8795 3aeb9633-f9a6-4693-b51c-875935bb3e16 variation_track 1.0 2024-04-24 10:18:14.000000 From 5c2d6ef7-fe03-4f1a-bcc2-fb72af9ffa46 565 32 Processed 1464 -8796 628143db-4adb-4086-9168-1f7e875bbafe evidence 1.0 2024-04-24 10:18:14.000000 From 69f38cd5-5774-4b29-9ef4-181441ca0eb8 571 4 Processed 1470 -8797 0c150044-3236-4cbd-ba06-19e19d10000a short_variant 1.0 2024-04-24 10:18:14.000000 From 69f38cd5-5774-4b29-9ef4-181441ca0eb8 571 5 Processed 1470 -8798 e488f2c0-8926-4cd4-a3f4-5085885273ba browser_files 1.0 2024-04-24 10:18:14.000000 From 69f38cd5-5774-4b29-9ef4-181441ca0eb8 571 31 Processed 1470 -8799 0b1d6792-050b-461b-b6e7-8013f03caace variation_ftp 1.0 2024-04-24 10:18:14.000000 From 69f38cd5-5774-4b29-9ef4-181441ca0eb8 571 29 Processed 1470 -8800 b55eedc6-c8e9-46f9-8f3a-5487b590d563 variation_track 1.0 2024-04-24 10:18:14.000000 From 69f38cd5-5774-4b29-9ef4-181441ca0eb8 571 32 Processed 1470 -8801 4f615d55-8b10-4004-88c0-169e1016032c evidence 1.0 2024-04-24 10:18:14.000000 From a4c858c4-2e55-43c9-af8b-7adc7ce24c58 592 4 Processed 1491 -8802 9bdb03db-aed4-41af-be5c-7912d5cf82ad short_variant 1.0 2024-04-24 10:18:14.000000 From a4c858c4-2e55-43c9-af8b-7adc7ce24c58 592 5 Processed 1491 -8803 1bc869fb-7586-4394-8da3-40502ce06f28 browser_files 1.0 2024-04-24 10:18:14.000000 From a4c858c4-2e55-43c9-af8b-7adc7ce24c58 592 31 Processed 1491 -8804 0d05a7a3-6a4d-4f61-bcfe-76728865fe0a variation_ftp 1.0 2024-04-24 10:18:14.000000 From a4c858c4-2e55-43c9-af8b-7adc7ce24c58 592 29 Processed 1491 -8805 3f87e8e7-21f6-441c-83a4-a5c7204c5320 variation_track 1.0 2024-04-24 10:18:14.000000 From a4c858c4-2e55-43c9-af8b-7adc7ce24c58 592 32 Processed 1491 -8806 206879dd-6d27-451d-b5aa-8330696afc1a evidence 1.0 2024-04-24 10:18:14.000000 From 79e1a7b8-85dc-4bfe-b606-6992ca109ede 595 4 Processed 1494 -8807 1f82e0d6-2232-4dc7-9dba-c1c62b42c24f short_variant 1.0 2024-04-24 10:18:14.000000 From 79e1a7b8-85dc-4bfe-b606-6992ca109ede 595 5 Processed 1494 -8808 4c9ad5d4-3a35-40b7-ba30-a09b958a205b browser_files 1.0 2024-04-24 10:18:14.000000 From 79e1a7b8-85dc-4bfe-b606-6992ca109ede 595 31 Processed 1494 -8809 1c37a8bf-43d6-4fc8-98d8-b4e0d3a31931 variation_ftp 1.0 2024-04-24 10:18:14.000000 From 79e1a7b8-85dc-4bfe-b606-6992ca109ede 595 29 Processed 1494 -8810 460598d9-1266-4652-8ef8-536e8c1391fe variation_track 1.0 2024-04-24 10:18:14.000000 From 79e1a7b8-85dc-4bfe-b606-6992ca109ede 595 32 Processed 1494 -8811 6c28c1ae-af07-4199-9da9-b48ae0fcb7b7 homology_compute 1.0 2024-04-24 10:18:14.000000 From 35f56606-9186-432f-b033-6e7204708f3b 294 25 Processed 2319 -8812 4d9b87ea-6313-4aee-ad8b-ab2a2813e22a homology_load 1.0 2024-04-24 10:18:14.000000 From 35f56606-9186-432f-b033-6e7204708f3b 294 26 Processed 2319 -8813 9e3a4f8a-95f2-415c-88ec-5da6eb3e7c10 homology_ftp 1.0 2024-04-24 10:18:14.000000 From 35f56606-9186-432f-b033-6e7204708f3b 294 27 Processed 2319 -8814 cb8f93b0-b903-49c8-ad48-1cc4f4ceee6a homology_compute 1.0 2024-04-24 10:18:14.000000 From 8ebbce8e-dcc7-49f8-b520-4d479aef2a65 457 25 Processed 6593 -8815 9045fc64-a2f2-4cfa-b10e-8b55e6e631e2 homology_load 1.0 2024-04-24 10:18:14.000000 From 8ebbce8e-dcc7-49f8-b520-4d479aef2a65 457 26 Processed 6593 -8816 b9c88135-824b-425b-ab1b-156cd58a0bde homology_ftp 1.0 2024-04-24 10:18:15.000000 From 8ebbce8e-dcc7-49f8-b520-4d479aef2a65 457 27 Processed 6593 -8817 fd821999-6a13-407c-b6ae-bda323fc1795 homology_compute 1.0 2024-04-24 10:18:15.000000 From 60fae37a-afe2-4bb1-9da0-321911dd2856 266 25 Processed 6623 -8818 f9634579-f88a-4892-b2c1-1762da95b69e homology_load 1.0 2024-04-24 10:18:15.000000 From 60fae37a-afe2-4bb1-9da0-321911dd2856 266 26 Processed 6623 -8819 5bce46a4-6eea-4b4d-8843-681ed932e251 homology_ftp 1.0 2024-04-24 10:18:15.000000 From 60fae37a-afe2-4bb1-9da0-321911dd2856 266 27 Processed 6623 -8820 1e3c7c73-8748-4b80-afc5-37a4045e0f71 homology_compute 1.0 2024-04-24 10:18:15.000000 From 06debe4a-2b3e-4fab-991e-26863dbe8af8 323 25 Processed 6699 -8821 0c048acb-d4e2-441f-8d8c-86b83e9bf23d homology_load 1.0 2024-04-24 10:18:15.000000 From 06debe4a-2b3e-4fab-991e-26863dbe8af8 323 26 Processed 6699 -8822 021b89f1-0f7f-4478-97aa-0accd0a7606e homology_ftp 1.0 2024-04-24 10:18:15.000000 From 06debe4a-2b3e-4fab-991e-26863dbe8af8 323 27 Processed 6699 -8823 c929d7cd-aa29-4605-863d-60072b9eccc8 homology_compute 1.0 2024-04-24 10:18:15.000000 From f3abf167-6a8f-45cc-b753-22a955123758 387 25 Processed 6896 -8824 ecdd3f2d-74ba-4a2b-96e9-c43bce42e266 homology_load 1.0 2024-04-24 10:18:15.000000 From f3abf167-6a8f-45cc-b753-22a955123758 387 26 Processed 6896 -8825 f7834e60-f18a-4ec6-b8c3-def92135f691 homology_ftp 1.0 2024-04-24 10:18:15.000000 From f3abf167-6a8f-45cc-b753-22a955123758 387 27 Processed 6896 -8826 2f3e7f30-2c95-4bcf-a02f-3f8819ab6562 homology_compute 1.0 2024-04-24 10:18:15.000000 From 9681f4c2-afb4-4a08-8e4d-f26363f65ddf 369 25 Processed 7069 -8827 afdc661a-4e30-411f-b453-14ecf1973672 homology_load 1.0 2024-04-24 10:18:15.000000 From 9681f4c2-afb4-4a08-8e4d-f26363f65ddf 369 26 Processed 7069 -8828 f9fa5df3-e813-4667-aea2-2df4d67ffdb4 homology_ftp 1.0 2024-04-24 10:18:15.000000 From 9681f4c2-afb4-4a08-8e4d-f26363f65ddf 369 27 Processed 7069 -8829 85b782ae-d1a6-4bee-8a48-3dae3c9da7aa homology_compute 1.0 2024-04-24 10:18:15.000000 From d57040b6-0ef5-4e6b-97ef-be0ad94d3a61 424 25 Processed 7177 -8830 3b89fcc0-1809-4de6-ae4e-e82bf34c26bf homology_load 1.0 2024-04-24 10:18:15.000000 From d57040b6-0ef5-4e6b-97ef-be0ad94d3a61 424 26 Processed 7177 -8831 e1a9a2ed-318a-4e57-9136-ecb33082d71c homology_ftp 1.0 2024-04-24 10:18:15.000000 From d57040b6-0ef5-4e6b-97ef-be0ad94d3a61 424 27 Processed 7177 -8832 003c2932-ba1d-47b8-8f32-88b27bd79d87 homology_compute 1.0 2024-04-24 10:18:15.000000 From c4f0eec5-f4b3-4cef-8369-7b13932509e1 284 25 Processed 7320 -8833 9fa37967-8ace-4f6d-925b-dce75a70ab79 homology_load 1.0 2024-04-24 10:18:15.000000 From c4f0eec5-f4b3-4cef-8369-7b13932509e1 284 26 Processed 7320 -8834 94360425-5f48-4bc9-8863-2011b3e8115d homology_ftp 1.0 2024-04-24 10:18:15.000000 From c4f0eec5-f4b3-4cef-8369-7b13932509e1 284 27 Processed 7320 -8835 9c874e53-bf5c-4c49-8a31-123c1025a7a6 homology_compute 1.0 2024-04-24 10:18:15.000000 From d51ab85a-f037-47a3-ba53-423ad8e42669 383 25 Processed 7535 -8836 ff6239d9-bd90-49f9-b3a5-0cee348267b7 homology_load 1.0 2024-04-24 10:18:15.000000 From d51ab85a-f037-47a3-ba53-423ad8e42669 383 26 Processed 7535 -8837 b7b5b776-30f0-469f-bd19-707ba2fa8f3c homology_ftp 1.0 2024-04-24 10:18:15.000000 From d51ab85a-f037-47a3-ba53-423ad8e42669 383 27 Processed 7535 -8838 18783f74-ffd9-4e46-90bd-f18d7ca00896 homology_compute 1.0 2024-04-24 10:18:15.000000 From 254a68c7-f512-446d-a958-983a2713daf2 359 25 Processed 7603 -8839 e2ea6dca-fd18-4cc5-8120-d724bea5a5bd homology_load 1.0 2024-04-24 10:18:15.000000 From 254a68c7-f512-446d-a958-983a2713daf2 359 26 Processed 7603 -8840 86288d80-6d0e-4da8-9c3c-a2a78b740f63 homology_ftp 1.0 2024-04-24 10:18:15.000000 From 254a68c7-f512-446d-a958-983a2713daf2 359 27 Processed 7603 -8841 c40f1ff1-2da6-4569-9c95-dae534bdb59c homology_compute 1.0 2024-04-24 10:18:15.000000 From 9f2a7c92-e04a-443f-a991-1481a9466456 313 25 Processed 7785 -8842 9e45ec23-6f77-40b3-b487-717981b6789a homology_load 1.0 2024-04-24 10:18:15.000000 From 9f2a7c92-e04a-443f-a991-1481a9466456 313 26 Processed 7785 -8843 ea7e6915-8d77-4eee-bb3c-35e93a41e7e9 homology_ftp 1.0 2024-04-24 10:18:15.000000 From 9f2a7c92-e04a-443f-a991-1481a9466456 313 27 Processed 7785 -8844 3c7d8319-a57c-447b-b47f-5a1873a0b018 homology_compute 1.0 2024-04-24 10:18:15.000000 From 681ceca3-3336-4b92-ac11-43b5fcabec62 251 25 Processed 7820 -8845 b9dac26c-ecc2-4ec4-a8ac-d240cd0b748e homology_load 1.0 2024-04-24 10:18:15.000000 From 681ceca3-3336-4b92-ac11-43b5fcabec62 251 26 Processed 7820 -8846 c2772f23-1c2f-4e55-8453-b6985475a629 homology_ftp 1.0 2024-04-24 10:18:15.000000 From 681ceca3-3336-4b92-ac11-43b5fcabec62 251 27 Processed 7820 -8847 7e892555-9d54-479e-9d2c-1fab7e45c03f homology_compute 1.0 2024-04-24 10:18:15.000000 From d78259af-f491-42de-9cbf-de744b09efee 332 25 Processed 7847 -8848 3e2a34c6-cd2b-4e1f-bac4-9acf67254bf8 homology_load 1.0 2024-04-24 10:18:15.000000 From d78259af-f491-42de-9cbf-de744b09efee 332 26 Processed 7847 -8849 4b97ed95-560c-4a4c-95c1-44b654ad0fa0 homology_ftp 1.0 2024-04-24 10:18:15.000000 From d78259af-f491-42de-9cbf-de744b09efee 332 27 Processed 7847 -8850 10e0efaa-e444-4e8d-927e-a761aa30969f homology_compute 1.0 2024-04-24 10:18:15.000000 From b6472939-9e49-4d46-b93e-304910acabf3 4352 25 Processed 8392 -8851 369f5f6b-763f-44ea-8f13-862c06461346 homology_load 1.0 2024-04-24 10:18:15.000000 From b6472939-9e49-4d46-b93e-304910acabf3 4352 26 Processed 8392 -8852 f04c6b1b-4458-4d28-aa47-b43ea12b08c9 homology_ftp 1.0 2024-04-24 10:18:15.000000 From b6472939-9e49-4d46-b93e-304910acabf3 4352 27 Processed 8392 -8853 9ca4293d-6710-4903-9fc1-8417265ce27d genebuild_compute ENS01 2024-04-24 10:18:15.000000 From ed8ca387-38e3-4bfe-8b85-757a59b95126 97 8 Processed 178 -8854 570f2d24-a2c9-4bb2-895f-8653c0def936 xrefs ENS01 2024-04-24 10:18:15.000000 From 9ca4293d-6710-4903-9fc1-8417265ce27d 97 13 Processed 8853 -8855 0f80b03b-1145-4cdd-bf93-9df96b65f160 protein_features ENS01 2024-04-24 10:18:15.000000 From 9ca4293d-6710-4903-9fc1-8417265ce27d 97 14 Processed 8853 -8856 11584801-c419-4acf-bb1f-119303afc0d4 alpha_fold ENS01 2024-04-24 10:18:15.000000 From 9ca4293d-6710-4903-9fc1-8417265ce27d 97 15 Processed 8853 -8857 8ab46307-5632-4b68-9815-83997bf53b94 genebuild_files ENS01 2024-04-24 10:18:15.000000 From ed8ca387-38e3-4bfe-8b85-757a59b95126 97 9 Processed 178 -8858 1848bff4-5285-4c3a-abcc-62869d41dd87 blast ENS01 2024-04-24 10:18:15.000000 From 8ab46307-5632-4b68-9815-83997bf53b94 97 16 Processed 8857 -8859 1861572a-1bde-4720-bbfa-951b9d8a4456 ftp_dumps ENS01 2024-04-24 10:18:15.000000 From 8ab46307-5632-4b68-9815-83997bf53b94 97 17 Processed 8857 -8860 2109cb44-362b-4571-94da-67859d194824 genebuild_web ENS01 2024-04-24 10:18:15.000000 From ed8ca387-38e3-4bfe-8b85-757a59b95126 97 11 Processed 178 -8861 2f09c74f-c2cc-4fea-8d86-6e7461b3366d thoas_dumps ENS01 2024-04-24 10:18:15.000000 From 2109cb44-362b-4571-94da-67859d194824 97 18 Processed 8860 -8862 fdca5cd5-9a2f-4c94-bc52-fcdda4070e4e browser_files ENS01 2024-04-24 10:18:15.000000 From 2109cb44-362b-4571-94da-67859d194824 97 20 Processed 8860 -8863 a2a1216d-2cad-41d8-b115-a6711989abd5 checksums ENS01 2024-04-24 10:18:15.000000 From 2109cb44-362b-4571-94da-67859d194824 97 23 Processed 8860 -8864 7ed8ac66-3af1-419f-ab9c-08d2a40887a2 genebuild_compute ENS01 2024-04-24 10:18:16.000000 From f9690d7e-26c1-459d-8102-0c4a1a468806 9 8 Processed 896 -8865 f2e96294-16ce-4575-af45-f9a2c46383d7 xrefs ENS01 2024-04-24 10:18:16.000000 From 7ed8ac66-3af1-419f-ab9c-08d2a40887a2 9 13 Processed 8864 -8866 165a40b7-2eee-4ba4-b4dc-8b6ad2402004 protein_features ENS01 2024-04-24 10:18:16.000000 From 7ed8ac66-3af1-419f-ab9c-08d2a40887a2 9 14 Processed 8864 -8867 d3aee3e0-6b4b-43c9-8c52-19a18f91f824 alpha_fold ENS01 2024-04-24 10:18:16.000000 From 7ed8ac66-3af1-419f-ab9c-08d2a40887a2 9 15 Processed 8864 -8868 b66be831-e87b-4bd5-ba89-fc283cc50193 genebuild_files ENS01 2024-04-24 10:18:16.000000 From f9690d7e-26c1-459d-8102-0c4a1a468806 9 9 Processed 896 -8869 79505983-6bf5-4828-8240-4d51a36a3171 blast ENS01 2024-04-24 10:18:16.000000 From b66be831-e87b-4bd5-ba89-fc283cc50193 9 16 Processed 8868 -8870 2b747467-a74c-488d-a900-3f6385c1f26a ftp_dumps ENS01 2024-04-24 10:18:16.000000 From b66be831-e87b-4bd5-ba89-fc283cc50193 9 17 Processed 8868 -8871 b42fdfd2-7bca-4cd1-bee4-46287457ee93 genebuild_web ENS01 2024-04-24 10:18:16.000000 From f9690d7e-26c1-459d-8102-0c4a1a468806 9 11 Processed 896 -8872 1568bc5d-9fba-42df-87b3-52f77a8552b7 thoas_dumps ENS01 2024-04-24 10:18:16.000000 From b42fdfd2-7bca-4cd1-bee4-46287457ee93 9 18 Processed 8871 -8873 cff11c40-12e8-469d-945f-c1c6ffb852f5 browser_files ENS01 2024-04-24 10:18:16.000000 From b42fdfd2-7bca-4cd1-bee4-46287457ee93 9 20 Processed 8871 -8874 11e13c3e-5773-4f30-86c1-6c251f9c4c70 checksums ENS01 2024-04-24 10:18:16.000000 From b42fdfd2-7bca-4cd1-bee4-46287457ee93 9 23 Processed 8871 -8875 e79f65ba-08a5-4aca-b3b9-08ff7c36ba70 evidence 1.0 2024-04-24 10:18:16.000000 From 0c1cdfea-cb10-4a08-8f75-4158658d6a02 597 4 Processed 1496 -8876 7d15f5de-2e98-44b0-ba74-9c70bfd450c6 short_variant 1.0 2024-04-24 10:18:16.000000 From 0c1cdfea-cb10-4a08-8f75-4158658d6a02 597 5 Processed 1496 -8877 ea922b02-b8fe-4f7c-ac4d-a133acc5f532 browser_files 1.0 2024-04-24 10:18:16.000000 From 0c1cdfea-cb10-4a08-8f75-4158658d6a02 597 31 Processed 1496 -8878 7a89be81-4103-4eb8-98b2-23e96e0c4f76 genebuild_compute EXT01 2024-04-24 10:18:16.000000 From cc3c7f95-b5dc-4cc1-aa15-2817c89bd1e2 1 8 Released 2 -8879 20c06eab-391d-4b06-943c-0754f0fef146 xrefs EXT01 2024-04-24 10:18:16.000000 From 7a89be81-4103-4eb8-98b2-23e96e0c4f76 1 13 Released 8878 -8880 a29813e0-c950-40fc-b970-a360a10a15b6 protein_features EXT01 2024-04-24 10:18:16.000000 From 7a89be81-4103-4eb8-98b2-23e96e0c4f76 1 14 Released 8878 -8881 5c0b59d2-faf4-4297-b10d-c304e1f55998 alpha_fold EXT01 2024-04-24 10:18:16.000000 From 7a89be81-4103-4eb8-98b2-23e96e0c4f76 1 15 Released 8878 -8882 8d33dbd0-93d9-4279-bdfe-21f756afc898 genebuild_files EXT01 2024-04-24 10:18:16.000000 From cc3c7f95-b5dc-4cc1-aa15-2817c89bd1e2 1 9 Released 2 -8883 7e0ec09a-0ed9-461f-abb4-bb8de9f9b842 blast EXT01 2024-04-24 10:18:16.000000 From 8d33dbd0-93d9-4279-bdfe-21f756afc898 1 16 Released 8882 -8884 551593dc-42ad-45ec-8311-c052330feaac ftp_dumps EXT01 2024-04-24 10:18:16.000000 From 8d33dbd0-93d9-4279-bdfe-21f756afc898 1 17 Released 8882 -8885 7b5a6b07-d345-479f-95a0-c9a6712eb747 genebuild_web EXT01 2024-04-24 10:18:16.000000 From cc3c7f95-b5dc-4cc1-aa15-2817c89bd1e2 1 11 Released 2 -8886 711ea653-09ba-47da-b6ff-585c25548546 thoas_dumps EXT01 2024-04-24 10:18:16.000000 From 7b5a6b07-d345-479f-95a0-c9a6712eb747 1 18 Released 8885 -8887 1f7df867-7a26-4b59-98cb-866e63a215f9 browser_files EXT01 2024-04-24 10:18:16.000000 From 7b5a6b07-d345-479f-95a0-c9a6712eb747 1 20 Released 8885 -8888 34d43bd3-3061-43ac-b0ab-7eb4d7edd3b3 checksums EXT01 2024-04-24 10:18:16.000000 From 7b5a6b07-d345-479f-95a0-c9a6712eb747 1 23 Released 8885 -8889 fd8f0c4f-59c6-4d40-8dc9-1784f312b935 genebuild_web EXT01 2024-04-24 10:18:16.000000 From cc3c7f95-b5dc-4cc1-aa15-2817c89bd1e2 1 12 Released 2 -8890 2b3158ad-aedc-464d-bad1-7dc448a1623a thoas_load EXT01 2024-04-24 10:18:16.000000 From fd8f0c4f-59c6-4d40-8dc9-1784f312b935 1 19 Released 8889 -8891 62b23f57-bc61-4ade-aa1d-77ecf7f8b18c genebuild_track EXT01 2024-04-24 10:18:16.000000 From fd8f0c4f-59c6-4d40-8dc9-1784f312b935 1 21 Released 8889 -8892 620df1db-d908-4406-9f1d-e97b11c798c7 refget_load EXT01 2024-04-24 10:18:16.000000 From fd8f0c4f-59c6-4d40-8dc9-1784f312b935 1 24 Released 8889 -8893 35485780-095a-44ad-a85b-ed37aff1f5ac genebuild_compute EXT01 2024-04-24 10:18:16.000000 From f202cd36-d0dc-40df-9dd6-a8218e0d1366 14 8 Released 24 -8894 7760299d-bc48-4424-82ba-d069153212a1 xrefs EXT01 2024-04-24 10:18:16.000000 From 35485780-095a-44ad-a85b-ed37aff1f5ac 14 13 Released 8893 -8895 95b0c4a8-15fe-4d7e-b07a-3ebc08a95ddf protein_features EXT01 2024-04-24 10:18:16.000000 From 35485780-095a-44ad-a85b-ed37aff1f5ac 14 14 Released 8893 -8896 256fe2d6-3e6f-4c89-9d46-1616de1bac53 alpha_fold EXT01 2024-04-24 10:18:16.000000 From 35485780-095a-44ad-a85b-ed37aff1f5ac 14 15 Released 8893 -8897 273822b7-89dd-4eef-a0d7-c6aae2322939 genebuild_files EXT01 2024-04-24 10:18:16.000000 From f202cd36-d0dc-40df-9dd6-a8218e0d1366 14 9 Released 24 -8898 c090e67c-df71-401f-a90c-1fca8aee34fe blast EXT01 2024-04-24 10:18:16.000000 From 273822b7-89dd-4eef-a0d7-c6aae2322939 14 16 Released 8897 -8899 c51e1970-4e48-41cc-8955-be9172cf5f23 ftp_dumps EXT01 2024-04-24 10:18:16.000000 From 273822b7-89dd-4eef-a0d7-c6aae2322939 14 17 Released 8897 -8900 1dfc15fc-d7d2-499b-beb4-e6ce0f06f743 genebuild_web EXT01 2024-04-24 10:18:16.000000 From f202cd36-d0dc-40df-9dd6-a8218e0d1366 14 11 Released 24 -8901 d5e0ebee-7117-494d-bc1b-6d2c70d9491c thoas_dumps EXT01 2024-04-24 10:18:16.000000 From 1dfc15fc-d7d2-499b-beb4-e6ce0f06f743 14 18 Released 8900 -8902 6f8f8f0b-e755-45cc-97eb-d9c182e873b0 browser_files EXT01 2024-04-24 10:18:16.000000 From 1dfc15fc-d7d2-499b-beb4-e6ce0f06f743 14 20 Released 8900 -8903 ff6ccd65-e644-42e9-8f38-04a8f253bef9 checksums EXT01 2024-04-24 10:18:16.000000 From 1dfc15fc-d7d2-499b-beb4-e6ce0f06f743 14 23 Released 8900 -8904 8945cfec-17e5-48af-83f4-79907740fddd genebuild_web EXT01 2024-04-24 10:18:16.000000 From f202cd36-d0dc-40df-9dd6-a8218e0d1366 14 12 Released 24 -8905 2bdb1dcf-b45b-4d91-82ba-d1c9137ae7b9 thoas_load EXT01 2024-04-24 10:18:16.000000 From 8945cfec-17e5-48af-83f4-79907740fddd 14 19 Released 8904 -8906 a88f003b-d5bf-479f-aeeb-4696a3de728a genebuild_track EXT01 2024-04-24 10:18:16.000000 From 8945cfec-17e5-48af-83f4-79907740fddd 14 21 Released 8904 -8907 fce24263-d668-4daa-9eac-27abdebf3a90 refget_load EXT01 2024-04-24 10:18:16.000000 From 8945cfec-17e5-48af-83f4-79907740fddd 14 24 Released 8904 -8908 07f1ea6a-bc29-4426-ab28-db9e8df67135 genebuild_compute GENCODE19 2024-04-24 10:18:16.000000 From e456d1c2-eea2-40f1-83ee-31912905b695 40 8 Released 62 -8909 50c5df76-ad83-4cdd-822b-59a0a5a5caaa xrefs GENCODE19 2024-04-24 10:18:16.000000 From 07f1ea6a-bc29-4426-ab28-db9e8df67135 40 13 Released 8908 -8910 f45160df-1af8-4bb4-b52b-099c9f5ce005 protein_features GENCODE19 2024-04-24 10:18:16.000000 From 07f1ea6a-bc29-4426-ab28-db9e8df67135 40 14 Released 8908 -8911 9bb457f3-a4c6-43ea-a5d8-df8193e33e2b alpha_fold GENCODE19 2024-04-24 10:18:16.000000 From 07f1ea6a-bc29-4426-ab28-db9e8df67135 40 15 Released 8908 -8912 1cd2a36c-9459-4aae-add4-594bdf9570ae genebuild_files GENCODE19 2024-04-24 10:18:16.000000 From e456d1c2-eea2-40f1-83ee-31912905b695 40 9 Released 62 -8913 f3ac13c6-0943-45e5-b553-6e6ecd5febb0 blast GENCODE19 2024-04-24 10:18:16.000000 From 1cd2a36c-9459-4aae-add4-594bdf9570ae 40 16 Released 8912 -8914 f817d89d-e6c7-474f-b585-a816c9a19926 ftp_dumps GENCODE19 2024-04-24 10:18:16.000000 From 1cd2a36c-9459-4aae-add4-594bdf9570ae 40 17 Released 8912 -8915 6af4ed97-8fde-41f8-9e28-30e4f2ff2d62 genebuild_web GENCODE19 2024-04-24 10:18:16.000000 From e456d1c2-eea2-40f1-83ee-31912905b695 40 11 Released 62 -8916 e055d56a-3878-4ccb-ac8b-56748d103fbd thoas_dumps GENCODE19 2024-04-24 10:18:16.000000 From 6af4ed97-8fde-41f8-9e28-30e4f2ff2d62 40 18 Released 8915 -8917 9c4831c4-8ee3-4058-8325-54a1d642e0a5 browser_files GENCODE19 2024-04-24 10:18:16.000000 From 6af4ed97-8fde-41f8-9e28-30e4f2ff2d62 40 20 Released 8915 -8918 8aa89c54-db39-4f69-9c23-78a2f7077548 checksums GENCODE19 2024-04-24 10:18:16.000000 From 6af4ed97-8fde-41f8-9e28-30e4f2ff2d62 40 23 Released 8915 -8919 c0993f4c-eda0-40e3-807d-ad7ea361e285 genebuild_web GENCODE19 2024-04-24 10:18:16.000000 From e456d1c2-eea2-40f1-83ee-31912905b695 40 12 Released 62 -8920 1bf39fbc-6863-4c23-8960-975a027556e6 thoas_load GENCODE19 2024-04-24 10:18:16.000000 From c0993f4c-eda0-40e3-807d-ad7ea361e285 40 19 Released 8919 -8921 3c1f9d35-8013-40ff-98b5-b62c764f284f genebuild_track GENCODE19 2024-04-24 10:18:16.000000 From c0993f4c-eda0-40e3-807d-ad7ea361e285 40 21 Released 8919 -8922 36d517a8-f972-4350-a44c-88d04d44286f refget_load GENCODE19 2024-04-24 10:18:16.000000 From c0993f4c-eda0-40e3-807d-ad7ea361e285 40 24 Released 8919 -8923 9687952e-9dc8-4240-aece-a968dadbe909 genebuild_compute EXT01 2024-04-24 10:18:17.000000 From 287a5483-55a4-46e6-a58b-a84ba0ddacd6 79 8 Released 148 -8924 3ed69d61-78ad-4cd0-9244-8337c6896de7 xrefs EXT01 2024-04-24 10:18:17.000000 From 9687952e-9dc8-4240-aece-a968dadbe909 79 13 Released 8923 -8925 90cbc64d-d88f-4bdf-ac6c-99e0837f4253 protein_features EXT01 2024-04-24 10:18:17.000000 From 9687952e-9dc8-4240-aece-a968dadbe909 79 14 Released 8923 -8926 a70bc6ec-1678-487e-89a9-c39b338bc624 alpha_fold EXT01 2024-04-24 10:18:17.000000 From 9687952e-9dc8-4240-aece-a968dadbe909 79 15 Released 8923 -8927 704e4912-c270-4647-99bb-e8789d092949 genebuild_files EXT01 2024-04-24 10:18:17.000000 From 287a5483-55a4-46e6-a58b-a84ba0ddacd6 79 9 Released 148 -8928 b90ce8e9-0ddd-4591-b118-ecce2ab04b37 blast EXT01 2024-04-24 10:18:17.000000 From 704e4912-c270-4647-99bb-e8789d092949 79 16 Released 8927 -8929 935a9cee-4712-47ea-8f56-5aacf09b3883 ftp_dumps EXT01 2024-04-24 10:18:17.000000 From 704e4912-c270-4647-99bb-e8789d092949 79 17 Released 8927 -8930 26fe8cb4-05b4-4f6c-83d9-a4b34c32ca08 genebuild_web EXT01 2024-04-24 10:18:17.000000 From 287a5483-55a4-46e6-a58b-a84ba0ddacd6 79 11 Released 148 -8931 b4f77b8b-00e0-4977-9c75-5c2d65a07f0d thoas_dumps EXT01 2024-04-24 10:18:17.000000 From 26fe8cb4-05b4-4f6c-83d9-a4b34c32ca08 79 18 Released 8930 -8932 c82b305f-f783-40ab-af1a-8c2375e8816a browser_files EXT01 2024-04-24 10:18:17.000000 From 26fe8cb4-05b4-4f6c-83d9-a4b34c32ca08 79 20 Released 8930 -8933 fcd75ab2-a057-4d7d-ad37-b7604a7d0764 checksums EXT01 2024-04-24 10:18:17.000000 From 26fe8cb4-05b4-4f6c-83d9-a4b34c32ca08 79 23 Released 8930 -8934 a81f1f36-c2e4-498e-ba53-8c8a28759a4c genebuild_web EXT01 2024-04-24 10:18:17.000000 From 287a5483-55a4-46e6-a58b-a84ba0ddacd6 79 12 Released 148 -8935 56fcabc6-a26f-4007-aef7-28440b60a990 thoas_load EXT01 2024-04-24 10:18:17.000000 From a81f1f36-c2e4-498e-ba53-8c8a28759a4c 79 19 Released 8934 -8936 f9ef4142-f4c9-4def-84af-c9480934d408 genebuild_track EXT01 2024-04-24 10:18:17.000000 From a81f1f36-c2e4-498e-ba53-8c8a28759a4c 79 21 Released 8934 -8937 329e59f5-ff09-4e8f-8398-c5bb37d25a16 refget_load EXT01 2024-04-24 10:18:17.000000 From a81f1f36-c2e4-498e-ba53-8c8a28759a4c 79 24 Released 8934 -8938 d732f47d-4783-4cf3-80ee-566347f27fe5 genebuild_compute GENCODE44 2024-04-24 10:18:17.000000 From 949defef-c4d2-4ab1-8a73-f41d2b3c7719 92 8 Released 172 -8939 f0e56fd2-53a7-4a5d-8c6e-a0e425659e2d xrefs GENCODE44 2024-04-24 10:18:17.000000 From d732f47d-4783-4cf3-80ee-566347f27fe5 92 13 Released 8938 -8940 9e50213a-a3dc-4d86-bff2-2c607ea97be6 protein_features GENCODE44 2024-04-24 10:18:17.000000 From d732f47d-4783-4cf3-80ee-566347f27fe5 92 14 Released 8938 -8941 c683f337-fe1a-4080-8fa6-b2f5921c82f5 alpha_fold GENCODE44 2024-04-24 10:18:17.000000 From d732f47d-4783-4cf3-80ee-566347f27fe5 92 15 Released 8938 -8942 a26a6832-2081-4f10-bdec-9034f9682f88 genebuild_files GENCODE44 2024-04-24 10:18:17.000000 From 949defef-c4d2-4ab1-8a73-f41d2b3c7719 92 9 Released 172 -8943 1bbc28e3-55b6-42aa-a806-231f4d645d79 blast GENCODE44 2024-04-24 10:18:17.000000 From a26a6832-2081-4f10-bdec-9034f9682f88 92 16 Released 8942 -8944 6353961a-be42-43f6-be0e-98dff9a0e6fa ftp_dumps GENCODE44 2024-04-24 10:18:17.000000 From a26a6832-2081-4f10-bdec-9034f9682f88 92 17 Released 8942 -8945 698fc95c-9f61-4a92-8ad8-8dd3f1ec3c77 genebuild_web GENCODE44 2024-04-24 10:18:17.000000 From 949defef-c4d2-4ab1-8a73-f41d2b3c7719 92 11 Released 172 -8946 4f8b970e-054f-48a2-82ee-f638d42cb88c thoas_dumps GENCODE44 2024-04-24 10:18:17.000000 From 698fc95c-9f61-4a92-8ad8-8dd3f1ec3c77 92 18 Released 8945 -8947 893ab5b4-cf72-4ac8-93a9-6b263ef24fec browser_files GENCODE44 2024-04-24 10:18:17.000000 From 698fc95c-9f61-4a92-8ad8-8dd3f1ec3c77 92 20 Released 8945 -8948 0d2e1d80-ec5d-4c83-a777-d0e12dd3a20d checksums GENCODE44 2024-04-24 10:18:17.000000 From 698fc95c-9f61-4a92-8ad8-8dd3f1ec3c77 92 23 Released 8945 -8949 d0c2c132-d6e9-4cc6-b7ea-90aae49fdd97 genebuild_web GENCODE44 2024-04-24 10:18:17.000000 From 949defef-c4d2-4ab1-8a73-f41d2b3c7719 92 12 Released 172 -8950 7cc832cd-2a22-4326-95ec-3b440ad218d0 thoas_load GENCODE44 2024-04-24 10:18:17.000000 From d0c2c132-d6e9-4cc6-b7ea-90aae49fdd97 92 19 Released 8949 -8951 743e6c8d-5fe5-4417-9673-d807b4b494e8 genebuild_track GENCODE44 2024-04-24 10:18:17.000000 From d0c2c132-d6e9-4cc6-b7ea-90aae49fdd97 92 21 Released 8949 -8952 775b793f-124c-4b97-8734-cf38aa2e66d4 refget_load GENCODE44 2024-04-24 10:18:17.000000 From d0c2c132-d6e9-4cc6-b7ea-90aae49fdd97 92 24 Released 8949 -8953 6c9c5c7a-b58a-4fd6-92a2-8c1d2c8e155c genebuild_compute EXT01 2024-04-24 10:18:17.000000 From cfef61f8-7e24-4ed6-945f-baca1b2664a3 214 8 Released 402 -8954 7e303177-8107-4467-affd-60fcb3cb9fe9 xrefs EXT01 2024-04-24 10:18:17.000000 From 6c9c5c7a-b58a-4fd6-92a2-8c1d2c8e155c 214 13 Released 8953 -8955 5ea5d208-db3d-409a-ad3b-90fc25357975 protein_features EXT01 2024-04-24 10:18:17.000000 From 6c9c5c7a-b58a-4fd6-92a2-8c1d2c8e155c 214 14 Released 8953 -8956 bde17d27-1505-4d55-81e1-589b2c160758 alpha_fold EXT01 2024-04-24 10:18:17.000000 From 6c9c5c7a-b58a-4fd6-92a2-8c1d2c8e155c 214 15 Released 8953 -8957 a312a91d-c738-44d2-9117-3289689074bf genebuild_files EXT01 2024-04-24 10:18:17.000000 From cfef61f8-7e24-4ed6-945f-baca1b2664a3 214 9 Released 402 -8958 cabdf704-d7a7-43c6-91b1-ef13643fe743 blast EXT01 2024-04-24 10:18:17.000000 From a312a91d-c738-44d2-9117-3289689074bf 214 16 Released 8957 -8959 35242e52-ac0a-4232-a04e-602712188981 ftp_dumps EXT01 2024-04-24 10:18:17.000000 From a312a91d-c738-44d2-9117-3289689074bf 214 17 Released 8957 -8960 c679cde3-49b9-4b2e-a415-f5e41a2584ad genebuild_web EXT01 2024-04-24 10:18:17.000000 From cfef61f8-7e24-4ed6-945f-baca1b2664a3 214 11 Released 402 -8961 15392806-6489-4ca8-8dfe-4c068b8cbfc5 thoas_dumps EXT01 2024-04-24 10:18:17.000000 From c679cde3-49b9-4b2e-a415-f5e41a2584ad 214 18 Released 8960 -8962 30b00998-cf48-4e19-87a5-0d77a7d8eab8 browser_files EXT01 2024-04-24 10:18:17.000000 From c679cde3-49b9-4b2e-a415-f5e41a2584ad 214 20 Released 8960 -8963 7405d0be-af0e-4a25-909d-79804221ab66 checksums EXT01 2024-04-24 10:18:17.000000 From c679cde3-49b9-4b2e-a415-f5e41a2584ad 214 23 Released 8960 -8964 63902fec-bfaa-49bd-90c7-38910ad11921 genebuild_web EXT01 2024-04-24 10:18:17.000000 From cfef61f8-7e24-4ed6-945f-baca1b2664a3 214 12 Released 402 -8965 84157ee7-4328-4489-a235-062c76cc6bc9 thoas_load EXT01 2024-04-24 10:18:17.000000 From 63902fec-bfaa-49bd-90c7-38910ad11921 214 19 Released 8964 -8966 b1b03468-d47e-4033-b164-e24a77c1419a genebuild_track EXT01 2024-04-24 10:18:17.000000 From 63902fec-bfaa-49bd-90c7-38910ad11921 214 21 Released 8964 -8967 6f136ae5-07b4-457c-9452-b7272490214a refget_load EXT01 2024-04-24 10:18:17.000000 From 63902fec-bfaa-49bd-90c7-38910ad11921 214 24 Released 8964 -8968 0c0887c2-839a-4df9-b9dc-85c7f1605e16 genebuild_compute EXT01 2024-04-24 10:18:17.000000 From ea69f164-cc77-4671-bf97-c7f537dc400e 217 8 Released 406 -8969 8543b0f0-0798-4af5-9ee2-68ab081ce2f9 xrefs EXT01 2024-04-24 10:18:17.000000 From 0c0887c2-839a-4df9-b9dc-85c7f1605e16 217 13 Released 8968 -8970 915f67e9-46aa-419c-b1f1-3e3fe8d37c9f protein_features EXT01 2024-04-24 10:18:17.000000 From 0c0887c2-839a-4df9-b9dc-85c7f1605e16 217 14 Released 8968 -8971 9429df49-33dd-4c4b-bb1e-a086f9be1311 alpha_fold EXT01 2024-04-24 10:18:17.000000 From 0c0887c2-839a-4df9-b9dc-85c7f1605e16 217 15 Released 8968 -8972 b3979b48-55ad-42a3-9f03-b0e6eb1b8408 genebuild_files EXT01 2024-04-24 10:18:17.000000 From ea69f164-cc77-4671-bf97-c7f537dc400e 217 9 Released 406 -8973 4d258abd-6847-486c-9196-bb4da2a13cd0 blast EXT01 2024-04-24 10:18:17.000000 From b3979b48-55ad-42a3-9f03-b0e6eb1b8408 217 16 Released 8972 -8974 fe818538-d329-4e46-b311-71a13f546eb7 ftp_dumps EXT01 2024-04-24 10:18:17.000000 From b3979b48-55ad-42a3-9f03-b0e6eb1b8408 217 17 Released 8972 -8975 537ac1ca-5839-4d21-8f66-815253a29de8 genebuild_web EXT01 2024-04-24 10:18:17.000000 From ea69f164-cc77-4671-bf97-c7f537dc400e 217 11 Released 406 -8976 e6c747ba-6515-447c-9dec-223a5e7b5ab2 thoas_dumps EXT01 2024-04-24 10:18:17.000000 From 537ac1ca-5839-4d21-8f66-815253a29de8 217 18 Released 8975 -8977 2cd06f2e-8ebb-400a-9cd2-6a313dd67b79 browser_files EXT01 2024-04-24 10:18:17.000000 From 537ac1ca-5839-4d21-8f66-815253a29de8 217 20 Released 8975 -8978 e3282d40-5aec-4970-924c-20fb943324fe checksums EXT01 2024-04-24 10:18:18.000000 From 537ac1ca-5839-4d21-8f66-815253a29de8 217 23 Released 8975 -8979 8d7811aa-63db-480f-b2cd-28fbd0e414e7 genebuild_web EXT01 2024-04-24 10:18:18.000000 From ea69f164-cc77-4671-bf97-c7f537dc400e 217 12 Released 406 -8980 bb86329a-3ff5-4aa1-bccb-fa10866c0400 thoas_load EXT01 2024-04-24 10:18:18.000000 From 8d7811aa-63db-480f-b2cd-28fbd0e414e7 217 19 Released 8979 -8981 58a02fb2-387a-4d93-9798-b8a2ec2a990b genebuild_track EXT01 2024-04-24 10:18:18.000000 From 8d7811aa-63db-480f-b2cd-28fbd0e414e7 217 21 Released 8979 -8982 a9e9630d-7e6c-4a03-8b76-3eea9d58ddd5 refget_load EXT01 2024-04-24 10:18:18.000000 From 8d7811aa-63db-480f-b2cd-28fbd0e414e7 217 24 Released 8979 -8983 b847fdd0-205d-4010-a216-a150eb9dcf62 evidence 1.0 2024-04-24 10:18:18.000000 From bf1f5064-8520-4f19-84e4-449aa6c1c1e2 673 4 Released 1391 -8984 d6a11f1e-41fd-409b-a42c-6bb5eed4536d short_variant 1.0 2024-04-24 10:18:18.000000 From bf1f5064-8520-4f19-84e4-449aa6c1c1e2 673 5 Released 1391 -8985 8ba10f26-694b-4e8d-9888-aaa860581af7 variation_ftp 1.0 2024-04-24 10:18:18.000000 From bf1f5064-8520-4f19-84e4-449aa6c1c1e2 673 29 Released 1391 -8986 62bad9a6-2406-4a93-bc6d-1a3c871fce94 browser_files 1.0 2024-04-24 10:18:18.000000 From bf1f5064-8520-4f19-84e4-449aa6c1c1e2 673 31 Released 1391 -8987 48d069c5-f099-414d-bac1-b682f78a1fde variation_track 1.0 2024-04-24 10:18:18.000000 From bf1f5064-8520-4f19-84e4-449aa6c1c1e2 673 32 Released 1391 -8988 07a02fee-c19f-4c11-b35a-2e2b51d67378 evidence 1.0 2024-04-24 10:18:18.000000 From 5b869bbb-098f-4827-afc0-532a2bc88903 674 4 Released 1392 -8989 cc377779-9dc4-4cfb-ba14-81531a7fb69b short_variant 1.0 2024-04-24 10:18:18.000000 From 5b869bbb-098f-4827-afc0-532a2bc88903 674 5 Released 1392 -8990 d2261995-a526-4728-a6a8-568b17c217d3 variation_ftp 1.0 2024-04-24 10:18:18.000000 From 5b869bbb-098f-4827-afc0-532a2bc88903 674 29 Released 1392 -8991 97b81726-34e3-4dbc-976a-ac2e9fa64c17 browser_files 1.0 2024-04-24 10:18:18.000000 From 5b869bbb-098f-4827-afc0-532a2bc88903 674 31 Released 1392 -8992 49d855db-8570-4a4d-9290-d2fc3b1ba02d variation_track 1.0 2024-04-24 10:18:18.000000 From 5b869bbb-098f-4827-afc0-532a2bc88903 674 32 Released 1392 -8993 a487cac9-c58c-48d9-a0b3-2808346ce541 evidence 1.0 2024-04-24 10:18:18.000000 From 0a0bed83-72c7-4f8a-a1cb-97450ef82495 644 4 Released 1528 -8994 c5dec575-8720-46f3-ae5d-a86f22760b30 short_variant 1.0 2024-04-24 10:18:18.000000 From 0a0bed83-72c7-4f8a-a1cb-97450ef82495 644 5 Released 1528 -8995 05e09f88-0d11-47f8-8db0-92714ce6ba42 variation_ftp 1.0 2024-04-24 10:18:18.000000 From 0a0bed83-72c7-4f8a-a1cb-97450ef82495 644 29 Released 1528 -8996 96da2064-5735-4a91-bd1f-dbeff5548b50 browser_files 1.0 2024-04-24 10:18:18.000000 From 0a0bed83-72c7-4f8a-a1cb-97450ef82495 644 31 Released 1528 -8997 c50d02a2-16a0-44a2-be1a-32b63ab04deb variation_track 1.0 2024-04-24 10:18:18.000000 From 0a0bed83-72c7-4f8a-a1cb-97450ef82495 644 32 Released 1528 -8998 c1829f04-cccd-436d-9f0a-1d82ed117064 evidence 1.0 2024-04-24 10:18:18.000000 From ff7cb333-fc39-4f00-93e0-65a0d5eb596b 653 4 Released 1537 -8999 9bf0c2f7-2dc8-4f27-b578-a1cb277e1a63 short_variant 1.0 2024-04-24 10:18:18.000000 From ff7cb333-fc39-4f00-93e0-65a0d5eb596b 653 5 Released 1537 -9000 f3206998-32e5-465b-8b76-d21e3c24bb18 variation_ftp 1.0 2024-04-24 10:18:18.000000 From ff7cb333-fc39-4f00-93e0-65a0d5eb596b 653 29 Released 1537 -9001 dd7a4ab7-d890-488e-b014-b6e9eacf8a3a browser_files 1.0 2024-04-24 10:18:18.000000 From ff7cb333-fc39-4f00-93e0-65a0d5eb596b 653 31 Released 1537 -9002 263fda44-a84a-4879-912e-4de5e75be0ea variation_track 1.0 2024-04-24 10:18:18.000000 From ff7cb333-fc39-4f00-93e0-65a0d5eb596b 653 32 Released 1537 -9003 963a3baf-5f86-4372-b0ce-79726329ce59 evidence 1.0 2024-04-24 10:18:18.000000 From e659bef9-22f7-4ad2-8215-4a48ecd228df 660 4 Released 1544 -9004 cdabf7a3-e5bf-4afa-ae41-80eeca2ec76d short_variant 1.0 2024-04-24 10:18:18.000000 From e659bef9-22f7-4ad2-8215-4a48ecd228df 660 5 Released 1544 -9005 c708b53e-6fbb-49d9-b9c3-a8a09c3b7f0d variation_ftp 1.0 2024-04-24 10:18:18.000000 From e659bef9-22f7-4ad2-8215-4a48ecd228df 660 29 Released 1544 -9006 029df488-7091-47ed-9db7-e1abf23cc429 browser_files 1.0 2024-04-24 10:18:18.000000 From e659bef9-22f7-4ad2-8215-4a48ecd228df 660 31 Released 1544 -9007 b4863deb-6e3f-4f56-9904-f64cb1783409 variation_track 1.0 2024-04-24 10:18:18.000000 From e659bef9-22f7-4ad2-8215-4a48ecd228df 660 32 Released 1544 -9008 9feb9d70-8966-49f4-a385-3777b66ca2a2 homology_compute 1.0 2024-04-24 10:18:18.000000 From 4b02b11e-397c-4a4f-8c13-8b65efb87030 266 25 Released 2291 -9009 da2808ff-be03-4b52-aeb8-5415e509a8b1 homology_load 1.0 2024-04-24 10:18:18.000000 From 4b02b11e-397c-4a4f-8c13-8b65efb87030 266 26 Released 2291 -9010 9aaf2c4c-f026-4c6c-8b65-c475e3be683a homology_ftp 1.0 2024-04-24 10:18:18.000000 From 4b02b11e-397c-4a4f-8c13-8b65efb87030 266 27 Released 2291 -9011 049cc616-bf6e-4f21-b3b3-6ba0672e79ac homology_compute 1.0 2024-04-24 10:18:18.000000 From aebf0b81-4234-4aa9-85cc-abfe91f5eac2 323 25 Released 2348 -9012 0d4335ae-fcd5-4ac4-a1a7-d766c4dad6bc homology_load 1.0 2024-04-24 10:18:18.000000 From aebf0b81-4234-4aa9-85cc-abfe91f5eac2 323 26 Released 2348 -9013 7642b849-ba92-4f47-960c-9dc07b2488c8 homology_ftp 1.0 2024-04-24 10:18:18.000000 From aebf0b81-4234-4aa9-85cc-abfe91f5eac2 323 27 Released 2348 -9014 b2f53226-7a8f-41cc-bef3-f1dc6d7324ad homology_compute 1.0 2024-04-24 10:18:18.000000 From caac6097-4921-4c10-bfc0-1c3e9b2604dc 332 25 Released 2357 -9015 d174a9a1-39df-4e81-94bd-98db30505730 homology_load 1.0 2024-04-24 10:18:18.000000 From caac6097-4921-4c10-bfc0-1c3e9b2604dc 332 26 Released 2357 -9016 23f92c2d-83a6-43de-87c7-a83aad6406a5 homology_ftp 1.0 2024-04-24 10:18:18.000000 From caac6097-4921-4c10-bfc0-1c3e9b2604dc 332 27 Released 2357 -9017 41b03ede-f8f2-4c4c-b1f7-86d506798835 homology_compute 1.0 2024-04-24 10:18:18.000000 From 9f45f1a6-d4d0-4c02-9509-dec5a0d523fb 359 25 Released 2384 -9018 1e809aac-bfe5-4a89-9472-c2f183205f3b homology_load 1.0 2024-04-24 10:18:18.000000 From 9f45f1a6-d4d0-4c02-9509-dec5a0d523fb 359 26 Released 2384 -9019 8c572ee1-140c-43f4-9c6f-287662793018 homology_ftp 1.0 2024-04-24 10:18:18.000000 From 9f45f1a6-d4d0-4c02-9509-dec5a0d523fb 359 27 Released 2384 -9020 70452894-16f7-4b8c-a780-1dd869bec2fa homology_compute 1.0 2024-04-24 10:18:18.000000 From b67e1761-3341-4965-9a5b-041cb8230cb3 369 25 Released 2394 -9021 ba8bd474-977f-4c52-88b4-7cd99f288f2c homology_load 1.0 2024-04-24 10:18:18.000000 From b67e1761-3341-4965-9a5b-041cb8230cb3 369 26 Released 2394 -9022 8b148701-df21-4cdc-8123-66ba2a7a59e0 homology_ftp 1.0 2024-04-24 10:18:18.000000 From b67e1761-3341-4965-9a5b-041cb8230cb3 369 27 Released 2394 -9023 f1ba169e-f9a4-4a01-80cc-f2e2085a07cb homology_compute 1.0 2024-04-24 10:18:18.000000 From e6df4d05-8567-4143-8ea0-c6ad1b5a3fc2 424 25 Released 2449 -9024 794e6ba8-771e-4a7e-b851-f1b48f20806f homology_load 1.0 2024-04-24 10:18:19.000000 From e6df4d05-8567-4143-8ea0-c6ad1b5a3fc2 424 26 Released 2449 -9025 ae16f773-4395-4908-82f1-d5517b75ace5 homology_ftp 1.0 2024-04-24 10:18:19.000000 From e6df4d05-8567-4143-8ea0-c6ad1b5a3fc2 424 27 Released 2449 -9026 d84af6e2-313c-4930-bbac-8b74def7c6b7 homology_compute 1.0 2024-04-24 10:18:19.000000 From 58df568e-48c1-4a3b-838b-448540392f9c 433 25 Released 2458 -9027 b6156c27-8aef-4172-b150-6d6b27b0f4c6 homology_load 1.0 2024-04-24 10:18:19.000000 From 58df568e-48c1-4a3b-838b-448540392f9c 433 26 Released 2458 -9028 73f4fa74-7d7c-4bb4-a71e-1194ef1c244a homology_ftp 1.0 2024-04-24 10:18:19.000000 From 58df568e-48c1-4a3b-838b-448540392f9c 433 27 Released 2458 -9029 078ff308-f035-4c75-aa83-66e0697da057 browser_files 1.0 2024-04-24 10:18:19.000000 From 679d6452-799c-4a2f-8906-0db6c639e498 670 33 Released 2518 -9030 9b6fc878-fa10-49b6-bd99-98ade5e0252f regulation_track 1.0 2024-04-24 10:18:19.000000 From 679d6452-799c-4a2f-8906-0db6c639e498 670 34 Released 2518 -9031 852986f9-e63a-44b7-b182-7182d7070e1f regulation_ftp 1.0 2024-04-24 10:18:19.000000 From 679d6452-799c-4a2f-8906-0db6c639e498 670 35 Released 2518 -9032 7573b939-da2c-4997-8002-9da717ba79d2 genebuild_compute ENS01 2024-04-24 16:07:22.000000 From 2ef7c056-847e-4742-a68b-18c3ece068aa 18 8 Submitted 38 -9033 7bb8919c-d9e0-4eca-9a49-7a6d9e311c8d xrefs ENS01 2024-04-24 16:07:22.000000 From 7573b939-da2c-4997-8002-9da717ba79d2 18 13 Submitted 9032 -9034 a6a43d07-4ddd-4935-96f3-137882be6b5f protein_features ENS01 2024-04-24 16:07:22.000000 From 7573b939-da2c-4997-8002-9da717ba79d2 18 14 Submitted 9032 -9035 3286e886-cdde-45e2-a92c-2a5b7a43744b alpha_fold ENS01 2024-04-24 16:07:22.000000 From 7573b939-da2c-4997-8002-9da717ba79d2 18 15 Submitted 9032 -9036 a41c7eb3-8dd9-4449-bef3-8a2798d324c9 genebuild_files ENS01 2024-04-24 16:07:22.000000 From 2ef7c056-847e-4742-a68b-18c3ece068aa 18 9 Submitted 38 -9037 384e30bb-1940-475b-a7f1-94c3b5fa6251 blast ENS01 2024-04-24 16:07:22.000000 From a41c7eb3-8dd9-4449-bef3-8a2798d324c9 18 16 Submitted 9036 -9038 b0d8755a-d01b-4910-b84c-0e15ef1293ba ftp_dumps ENS01 2024-04-24 16:07:22.000000 From a41c7eb3-8dd9-4449-bef3-8a2798d324c9 18 17 Submitted 9036 -9039 3666e777-8cb5-420e-8f45-7469253db5f6 genebuild_web ENS01 2024-04-24 16:07:22.000000 From 2ef7c056-847e-4742-a68b-18c3ece068aa 18 11 Submitted 38 -9040 282e982f-493b-4f13-a927-3f9e3dc9a8a8 thoas_dumps ENS01 2024-04-24 16:07:22.000000 From 3666e777-8cb5-420e-8f45-7469253db5f6 18 18 Submitted 9039 -9041 f39ac854-157a-48a8-8b81-4345391c59c3 browser_files ENS01 2024-04-24 16:07:22.000000 From 3666e777-8cb5-420e-8f45-7469253db5f6 18 20 Submitted 9039 -9042 6a86ad7d-67d8-4c0b-a504-966225539fc0 checksums ENS01 2024-04-24 16:07:22.000000 From 3666e777-8cb5-420e-8f45-7469253db5f6 18 23 Submitted 9039 -9043 a128c1b9-6f98-40cf-a3ae-321d5e4e1106 genebuild_compute ENS01 2024-04-24 16:07:22.000000 From 11a0be7f-99ae-45d3-a004-dc19bb562330 100 8 Processed 184 -9044 65bacf69-42d4-439c-a436-f76208677771 xrefs ENS01 2024-04-24 16:07:22.000000 From a128c1b9-6f98-40cf-a3ae-321d5e4e1106 100 13 Processed 9043 -9045 d4716792-c4af-4ec9-a14c-220f4768ed88 protein_features ENS01 2024-04-24 16:07:22.000000 From a128c1b9-6f98-40cf-a3ae-321d5e4e1106 100 14 Processed 9043 -9046 17bc6764-fc30-4fe3-8cfc-18d10e5357d3 alpha_fold ENS01 2024-04-24 16:07:22.000000 From a128c1b9-6f98-40cf-a3ae-321d5e4e1106 100 15 Processed 9043 -9047 7e8844b3-733d-4962-a144-70e8cc69a3a6 genebuild_files ENS01 2024-04-24 16:07:22.000000 From 11a0be7f-99ae-45d3-a004-dc19bb562330 100 9 Processed 184 -9048 bec4dc62-aac5-4993-98ef-da92da3c3975 blast ENS01 2024-04-24 16:07:22.000000 From 7e8844b3-733d-4962-a144-70e8cc69a3a6 100 16 Processed 9047 -9049 a187630f-56ea-4012-b10f-96d4eee7e280 ftp_dumps ENS01 2024-04-24 16:07:22.000000 From 7e8844b3-733d-4962-a144-70e8cc69a3a6 100 17 Processed 9047 -9050 372c9ef4-5068-491e-bc9d-f173de3779d4 genebuild_web ENS01 2024-04-24 16:07:22.000000 From 11a0be7f-99ae-45d3-a004-dc19bb562330 100 11 Processed 184 -9051 1f9b9cf6-af64-49da-9f6a-e91fcfe3748b thoas_dumps ENS01 2024-04-24 16:07:22.000000 From 372c9ef4-5068-491e-bc9d-f173de3779d4 100 18 Processed 9050 -9052 14224fd8-39fc-4ce4-955a-9ceb53b7fe17 browser_files ENS01 2024-04-24 16:07:22.000000 From 372c9ef4-5068-491e-bc9d-f173de3779d4 100 20 Processed 9050 -9053 a2d8c490-6152-4d44-8cd6-6318be80c6d0 checksums ENS01 2024-04-24 16:07:22.000000 From 372c9ef4-5068-491e-bc9d-f173de3779d4 100 23 Processed 9050 -9054 569f3264-1e67-474a-bcca-d1f971bdfb6d genebuild_compute ENS01 2024-04-24 16:07:22.000000 From bd63a676-45ff-494a-b26f-2b779cb6c180 179 8 Processed 338 -9055 2f75afb7-07b9-4f26-914b-447609ae9661 xrefs ENS01 2024-04-24 16:07:22.000000 From 569f3264-1e67-474a-bcca-d1f971bdfb6d 179 13 Processed 9054 -9056 4030627d-9f93-418b-b162-ef4c4ea7187c protein_features ENS01 2024-04-24 16:07:22.000000 From 569f3264-1e67-474a-bcca-d1f971bdfb6d 179 14 Processed 9054 -9057 03698609-b0cc-4ebc-ba4e-9c1839c07375 alpha_fold ENS01 2024-04-24 16:07:22.000000 From 569f3264-1e67-474a-bcca-d1f971bdfb6d 179 15 Processed 9054 -9058 5d221954-3022-434b-8167-4837bcb83cdf genebuild_files ENS01 2024-04-24 16:07:22.000000 From bd63a676-45ff-494a-b26f-2b779cb6c180 179 9 Processed 338 -9059 0a2ed457-8cb8-4c32-9670-f2d29accf899 blast ENS01 2024-04-24 16:07:22.000000 From 5d221954-3022-434b-8167-4837bcb83cdf 179 16 Processed 9058 -9060 b8aa05bd-9375-49aa-bdd4-2cd81dded467 ftp_dumps ENS01 2024-04-24 16:07:22.000000 From 5d221954-3022-434b-8167-4837bcb83cdf 179 17 Processed 9058 -9061 032c5450-032b-4bd2-91e5-8b00482bb51f genebuild_web ENS01 2024-04-24 16:07:22.000000 From bd63a676-45ff-494a-b26f-2b779cb6c180 179 11 Processed 338 -9062 900b59d2-8ac7-4e41-a588-a4314dadfe9d thoas_dumps ENS01 2024-04-24 16:07:22.000000 From 032c5450-032b-4bd2-91e5-8b00482bb51f 179 18 Processed 9061 -9063 c813b3e5-9756-4431-86b9-1d78e3242ffc browser_files ENS01 2024-04-24 16:07:22.000000 From 032c5450-032b-4bd2-91e5-8b00482bb51f 179 20 Processed 9061 -9064 6677ee8d-e814-4991-87b1-967d752652f0 checksums ENS01 2024-04-24 16:07:22.000000 From 032c5450-032b-4bd2-91e5-8b00482bb51f 179 23 Processed 9061 -9065 b60e4ed3-9260-42fd-bb44-648e2240c0fd homology_compute 1.0 2024-04-24 16:07:22.000000 From f2734f34-36a0-4594-871d-f7f6d317d05a 429 25 Submitted 2454 -9066 ef13256d-516a-475d-9769-8ec0b487c39a homology_compute 1.0 2024-04-24 16:07:22.000000 From f32b7f9a-97fd-41cd-86be-a5fb5becd335 469 25 Processed 2494 -9067 6246ba7c-ae8c-4e66-b696-0aaceb586d75 homology_compute 1.0 2024-04-24 16:07:22.000000 From f93d21ca-9a24-4c31-ae11-b0f8d3deab6d 423 25 Submitted 6849 -9068 23bac8a9-553f-4e00-85f3-2844d6634364 homology_compute 1.0 2024-04-24 16:07:22.000000 From 5b618784-a5ff-46cc-8102-b082ffb6e447 368 25 Submitted 8130 -9069 dc06cef3-40c1-4924-82aa-d95003b033d0 homology_compute 1.0 2024-04-24 16:07:22.000000 From a5bf42be-63c1-4616-9af1-bc03aea92643 443 25 Submitted 8661 +8662 af8eee44-ca56-4baf-a5f1-ad60d1165f3a genebuild_compute ENS01 2024-04-24 10:18:12 From 23d52e01-2e3d-495f-b345-df41c605caa9 185 8 Released 348 +8663 a0fef323-23b9-4d4c-87b3-42f290dffbc7 xrefs ENS01 2024-04-24 10:18:12 From af8eee44-ca56-4baf-a5f1-ad60d1165f3a 185 13 Released 8662 +8664 5d12b439-f994-408b-a7cc-88a0ce2a1c5e protein_features ENS01 2024-04-24 10:18:12 From af8eee44-ca56-4baf-a5f1-ad60d1165f3a 185 14 Released 8662 +8665 fd7c81b9-bd96-4074-a78f-ce86059d3a55 alpha_fold ENS01 2024-04-24 10:18:12 From af8eee44-ca56-4baf-a5f1-ad60d1165f3a 185 15 Released 8662 +8666 e4630a33-1d85-4a93-9c3d-ba23f531e900 genebuild_files ENS01 2024-04-24 10:18:12 From 23d52e01-2e3d-495f-b345-df41c605caa9 185 9 Released 348 +8667 5dc9ebba-a6f0-4380-9f9b-5735855c8c0b blast ENS01 2024-04-24 10:18:12 From e4630a33-1d85-4a93-9c3d-ba23f531e900 185 16 Released 8666 +8668 12304159-93ac-4597-bbfb-fc487a580bd2 ftp_dumps ENS01 2024-04-24 10:18:12 From e4630a33-1d85-4a93-9c3d-ba23f531e900 185 17 Released 8666 +8669 aaf2c600-821f-4ade-a3e7-fde21c333060 genebuild_web ENS01 2024-04-24 10:18:12 From 23d52e01-2e3d-495f-b345-df41c605caa9 185 11 Released 348 +8670 97e9f169-4ba2-4d44-b958-7fc3233c2c24 thoas_dumps ENS01 2024-04-24 10:18:12 From aaf2c600-821f-4ade-a3e7-fde21c333060 185 18 Released 8669 +8671 45c72ec3-7b5e-4b5e-83f8-1fc5790b1ad4 browser_files ENS01 2024-04-24 10:18:12 From aaf2c600-821f-4ade-a3e7-fde21c333060 185 20 Released 8669 +8672 7a33c596-7883-4638-86d4-9aa4ac266110 checksums ENS01 2024-04-24 10:18:12 From aaf2c600-821f-4ade-a3e7-fde21c333060 185 23 Released 8669 +8673 d6f3a493-9abf-4cef-81b7-dbf7bd0a8c4c genebuild_web ENS01 2024-04-24 10:18:12 From 23d52e01-2e3d-495f-b345-df41c605caa9 185 12 Released 348 +8674 1ff09568-0971-4907-b023-2e81b9d73e61 thoas_load ENS01 2024-04-24 10:18:12 From d6f3a493-9abf-4cef-81b7-dbf7bd0a8c4c 185 19 Released 8673 +8675 ff3d51eb-154b-4665-887c-c406cc3bc78b genebuild_track ENS01 2024-04-24 10:18:12 From d6f3a493-9abf-4cef-81b7-dbf7bd0a8c4c 185 21 Released 8673 +8676 76bc09f9-ab83-49ad-9b14-b81dd2ee5eb0 refget_load ENS01 2024-04-24 10:18:12 From d6f3a493-9abf-4cef-81b7-dbf7bd0a8c4c 185 24 Released 8673 +8677 5fb17152-b5bd-4a8f-8b00-1e87b1bdd036 genebuild_compute ENS01 2024-04-24 10:18:12 From 9d717ead-ffe0-4fc1-b58c-3c057b754021 4 8 Released 888 +8678 8a49f103-b405-4f54-8714-980007cfe776 xrefs ENS01 2024-04-24 10:18:12 From 5fb17152-b5bd-4a8f-8b00-1e87b1bdd036 4 13 Released 8677 +8679 129e0e2b-b778-47d5-9252-822af8adbf5b protein_features ENS01 2024-04-24 10:18:12 From 5fb17152-b5bd-4a8f-8b00-1e87b1bdd036 4 14 Released 8677 +8680 d811d1ff-8e73-4215-b622-4da5d1ae68bc alpha_fold ENS01 2024-04-24 10:18:12 From 5fb17152-b5bd-4a8f-8b00-1e87b1bdd036 4 15 Released 8677 +8681 b57a6524-00c5-423a-b569-57e2039d5f75 genebuild_files ENS01 2024-04-24 10:18:12 From 9d717ead-ffe0-4fc1-b58c-3c057b754021 4 9 Released 888 +8682 90d32255-8476-4d33-8aca-cd05b65f6b6f blast ENS01 2024-04-24 10:18:12 From b57a6524-00c5-423a-b569-57e2039d5f75 4 16 Released 8681 +8683 3243f1ac-38aa-412b-9a2a-c5edf0336a2f ftp_dumps ENS01 2024-04-24 10:18:12 From b57a6524-00c5-423a-b569-57e2039d5f75 4 17 Released 8681 +8684 c68f4857-0f85-48eb-a9d1-37e7e9cb5d71 genebuild_web ENS01 2024-04-24 10:18:12 From 9d717ead-ffe0-4fc1-b58c-3c057b754021 4 11 Released 888 +8685 98cb07a1-2a1d-496a-a0a7-168662eda07b thoas_dumps ENS01 2024-04-24 10:18:12 From c68f4857-0f85-48eb-a9d1-37e7e9cb5d71 4 18 Released 8684 +8686 3b9fee1b-0c94-4345-9599-919ad721a7da browser_files ENS01 2024-04-24 10:18:12 From c68f4857-0f85-48eb-a9d1-37e7e9cb5d71 4 20 Released 8684 +8687 8978bd71-c1b1-40b5-8628-1dd84115badd checksums ENS01 2024-04-24 10:18:12 From c68f4857-0f85-48eb-a9d1-37e7e9cb5d71 4 23 Released 8684 +8688 17e767e8-4f6a-40a2-aaa1-ee4f03e37567 genebuild_web ENS01 2024-04-24 10:18:12 From 9d717ead-ffe0-4fc1-b58c-3c057b754021 4 12 Released 888 +8689 53979850-c127-4a85-a680-9183978bb250 thoas_load ENS01 2024-04-24 10:18:12 From 17e767e8-4f6a-40a2-aaa1-ee4f03e37567 4 19 Released 8688 +8690 2b8d9066-8a02-4d47-ab29-c39f43ccfc53 genebuild_track ENS01 2024-04-24 10:18:12 From 17e767e8-4f6a-40a2-aaa1-ee4f03e37567 4 21 Released 8688 +8691 4d418be3-ea1b-4f36-afa4-c40d113b3910 refget_load ENS01 2024-04-24 10:18:12 From 17e767e8-4f6a-40a2-aaa1-ee4f03e37567 4 24 Released 8688 +8692 3cfe16ce-8a7e-49c8-b719-2affce984771 genebuild_compute ENS01 2024-04-24 10:18:13 From 1c759aca-63a1-4eea-abe8-ef5f298fe6e2 5 8 Released 890 +8693 f5eeacaa-8ee4-4739-8aed-a6aeaaadd13e xrefs ENS01 2024-04-24 10:18:13 From 3cfe16ce-8a7e-49c8-b719-2affce984771 5 13 Released 8692 +8694 808c43bc-7950-4e32-935b-ef13f1a869c1 protein_features ENS01 2024-04-24 10:18:13 From 3cfe16ce-8a7e-49c8-b719-2affce984771 5 14 Released 8692 +8695 7413ae88-d840-47ca-a602-713e03e6b123 alpha_fold ENS01 2024-04-24 10:18:13 From 3cfe16ce-8a7e-49c8-b719-2affce984771 5 15 Released 8692 +8696 6a5f98e4-2f2f-4c88-9172-d02dc623c42f genebuild_files ENS01 2024-04-24 10:18:13 From 1c759aca-63a1-4eea-abe8-ef5f298fe6e2 5 9 Released 890 +8697 cd9f881d-1716-425d-9879-472193cbf337 blast ENS01 2024-04-24 10:18:13 From 6a5f98e4-2f2f-4c88-9172-d02dc623c42f 5 16 Released 8696 +8698 84d2b150-1e5b-49b2-a35c-14596d503ae0 ftp_dumps ENS01 2024-04-24 10:18:13 From 6a5f98e4-2f2f-4c88-9172-d02dc623c42f 5 17 Released 8696 +8699 5b63c887-d867-411c-b138-536ed0c430de genebuild_web ENS01 2024-04-24 10:18:13 From 1c759aca-63a1-4eea-abe8-ef5f298fe6e2 5 11 Released 890 +8700 3a7c3603-acfa-4803-8c0c-c413501c6180 thoas_dumps ENS01 2024-04-24 10:18:13 From 5b63c887-d867-411c-b138-536ed0c430de 5 18 Released 8699 +8701 6a52d021-242b-4e7a-84aa-b2f08d6b1e89 browser_files ENS01 2024-04-24 10:18:13 From 5b63c887-d867-411c-b138-536ed0c430de 5 20 Released 8699 +8702 30f5310e-7e78-4027-aa32-82de71946e20 checksums ENS01 2024-04-24 10:18:13 From 5b63c887-d867-411c-b138-536ed0c430de 5 23 Released 8699 +8703 f67689b2-8c52-4c3e-89da-70520e0613d8 genebuild_web ENS01 2024-04-24 10:18:13 From 1c759aca-63a1-4eea-abe8-ef5f298fe6e2 5 12 Released 890 +8704 e4e75159-a56c-4a38-ac81-b74f7e89c022 thoas_load ENS01 2024-04-24 10:18:13 From f67689b2-8c52-4c3e-89da-70520e0613d8 5 19 Released 8703 +8705 981eb155-b2c5-4571-955f-f2d7574ef5eb genebuild_track ENS01 2024-04-24 10:18:13 From f67689b2-8c52-4c3e-89da-70520e0613d8 5 21 Released 8703 +8706 078f49f0-cc6e-4e4a-bfee-f8fc240a635a refget_load ENS01 2024-04-24 10:18:13 From f67689b2-8c52-4c3e-89da-70520e0613d8 5 24 Released 8703 +8707 705c3da3-186f-42f7-bd2d-795285e9b246 evidence 1.0 2024-04-24 10:18:13 From 08543d8d-2110-46f3-a9b6-ac58c4af8202 576 4 Released 1475 +8708 bb98fc3a-30af-41b2-9dac-29d580b42b68 short_variant 1.0 2024-04-24 10:18:13 From 08543d8d-2110-46f3-a9b6-ac58c4af8202 576 5 Released 1475 +8709 4b9b9585-8570-4f26-8819-a45f92df8d23 variation_ftp 1.0 2024-04-24 10:18:13 From 08543d8d-2110-46f3-a9b6-ac58c4af8202 576 29 Released 1475 +8710 7ca071a6-2ab3-4535-8c52-a21cb012fe0c browser_files 1.0 2024-04-24 10:18:13 From 08543d8d-2110-46f3-a9b6-ac58c4af8202 576 31 Released 1475 +8711 9d5325d8-49ba-4606-aaa2-c7269a19f5f7 variation_track 1.0 2024-04-24 10:18:13 From 08543d8d-2110-46f3-a9b6-ac58c4af8202 576 32 Released 1475 +8712 d6d6f12a-c806-4db0-99dd-a667fbd7c191 evidence 1.0 2024-04-24 10:18:13 From dd28ea84-6890-4af8-bd2b-2caa9917f221 608 4 Released 1507 +8713 8ed8f4b5-423e-44f1-a01d-bf2c19857374 short_variant 1.0 2024-04-24 10:18:13 From dd28ea84-6890-4af8-bd2b-2caa9917f221 608 5 Released 1507 +8714 e17b4956-cf44-4ce3-bad6-c141fae5148c variation_ftp 1.0 2024-04-24 10:18:13 From dd28ea84-6890-4af8-bd2b-2caa9917f221 608 29 Released 1507 +8715 48876d82-b51c-41d5-818f-5af04bcf8fc3 browser_files 1.0 2024-04-24 10:18:13 From dd28ea84-6890-4af8-bd2b-2caa9917f221 608 31 Released 1507 +8716 c2dab1e0-5d72-4d50-9ec7-c8e90746ec65 variation_track 1.0 2024-04-24 10:18:13 From dd28ea84-6890-4af8-bd2b-2caa9917f221 608 32 Released 1507 +8717 6d799ad1-5fe6-477f-8ddb-6a16ab3ea33a evidence 1.0 2024-04-24 10:18:13 From b876cb36-6e84-4a2c-8af2-14e096d48df9 610 4 Released 1509 +8718 c06b0534-4708-4cec-913b-8b354fda0c6b short_variant 1.0 2024-04-24 10:18:13 From b876cb36-6e84-4a2c-8af2-14e096d48df9 610 5 Released 1509 +8719 fff94973-5318-4821-9afa-3fd2fc0b7a4e variation_ftp 1.0 2024-04-24 10:18:13 From b876cb36-6e84-4a2c-8af2-14e096d48df9 610 29 Released 1509 +8720 b5a2b993-1252-4495-bdf3-9eae0322cf39 browser_files 1.0 2024-04-24 10:18:13 From b876cb36-6e84-4a2c-8af2-14e096d48df9 610 31 Released 1509 +8721 db33d965-62b4-4d83-9738-ade69df4cff5 variation_track 1.0 2024-04-24 10:18:13 From b876cb36-6e84-4a2c-8af2-14e096d48df9 610 32 Released 1509 +8722 7780e084-b3ca-4df4-ba40-5a8bfea6e9a1 homology_compute 1.0 2024-04-24 10:18:13 From 0bdc7428-6f81-4c96-a8d3-f5d5bff5a9b8 251 25 Released 2276 +8723 71fe75e0-50ca-4b74-a8a6-3d8d016e4227 homology_load 1.0 2024-04-24 10:18:13 From 0bdc7428-6f81-4c96-a8d3-f5d5bff5a9b8 251 26 Released 2276 +8724 10879879-9a3a-4dfd-b0eb-c06699f0aada homology_ftp 1.0 2024-04-24 10:18:13 From 0bdc7428-6f81-4c96-a8d3-f5d5bff5a9b8 251 27 Released 2276 +8725 2925c0ee-f987-4102-b792-4904c7b98d19 homology_compute 1.0 2024-04-24 10:18:13 From 0571d77c-5cc6-4819-80bf-34a42acfc3f6 383 25 Released 2408 +8726 6f0b5633-abef-4daf-be84-489a979f8b0b homology_load 1.0 2024-04-24 10:18:13 From 0571d77c-5cc6-4819-80bf-34a42acfc3f6 383 26 Released 2408 +8727 3e44f562-0cae-4165-9ef5-75fd6593d2e1 homology_ftp 1.0 2024-04-24 10:18:13 From 0571d77c-5cc6-4819-80bf-34a42acfc3f6 383 27 Released 2408 +8728 80a73415-7eda-4c22-80a1-93508c1ebc03 homology_compute 1.0 2024-04-24 10:18:13 From 832c1885-6b98-4bf0-b4cf-70e317399bd4 457 25 Released 2482 +8729 11185ed4-ea77-406e-bbbd-829601db2463 homology_load 1.0 2024-04-24 10:18:13 From 832c1885-6b98-4bf0-b4cf-70e317399bd4 457 26 Released 2482 +8730 745f6eba-367b-4db9-92f7-7353603ce4ce homology_ftp 1.0 2024-04-24 10:18:13 From 832c1885-6b98-4bf0-b4cf-70e317399bd4 457 27 Released 2482 +8731 1dcbaf3e-5179-434c-beae-1416149f30cf genebuild_compute ENS01 2024-04-24 10:18:13 From 53936715-1371-4343-95af-f39d06943db7 7 8 Processed 14 +8732 d340ac5b-2f9b-44d7-bab8-99ff17516053 xrefs ENS01 2024-04-24 10:18:13 From 1dcbaf3e-5179-434c-beae-1416149f30cf 7 13 Processed 8731 +8733 5a2fd22b-31ac-4e69-ad20-137b6d297cf8 protein_features ENS01 2024-04-24 10:18:13 From 1dcbaf3e-5179-434c-beae-1416149f30cf 7 14 Processed 8731 +8734 7f988fe4-f4d5-4bd7-8516-2cfc767d7ec6 alpha_fold ENS01 2024-04-24 10:18:13 From 1dcbaf3e-5179-434c-beae-1416149f30cf 7 15 Processed 8731 +8735 81bdc51c-4c4f-4e07-850b-562d0d964269 genebuild_files ENS01 2024-04-24 10:18:13 From 53936715-1371-4343-95af-f39d06943db7 7 9 Processed 14 +8736 871842a4-566d-4b44-b883-caea737dbe70 blast ENS01 2024-04-24 10:18:13 From 81bdc51c-4c4f-4e07-850b-562d0d964269 7 16 Processed 8735 +8737 5e7deba4-4293-4d6a-b954-73dfdc3be208 ftp_dumps ENS01 2024-04-24 10:18:13 From 81bdc51c-4c4f-4e07-850b-562d0d964269 7 17 Processed 8735 +8738 2563efa7-e2be-401a-a89f-79ea71c17452 genebuild_web ENS01 2024-04-24 10:18:13 From 53936715-1371-4343-95af-f39d06943db7 7 11 Processed 14 +8739 430221cd-df0d-4727-bd3f-8bdd1e69fb62 thoas_dumps ENS01 2024-04-24 10:18:13 From 2563efa7-e2be-401a-a89f-79ea71c17452 7 18 Processed 8738 +8740 c918aa39-6dd4-4032-87da-5282e90c4142 browser_files ENS01 2024-04-24 10:18:13 From 2563efa7-e2be-401a-a89f-79ea71c17452 7 20 Processed 8738 +8741 ce889e08-ab8b-4420-891b-9fb3ab5f4e81 checksums ENS01 2024-04-24 10:18:13 From 2563efa7-e2be-401a-a89f-79ea71c17452 7 23 Processed 8738 +8742 96331294-fb65-457b-a3bd-5e41f1818044 genebuild_web ENS01 2024-04-24 10:18:13 From 53936715-1371-4343-95af-f39d06943db7 7 12 Processed 14 +8743 a31eb1a1-1ebe-478e-b6a5-fd04ffbb0e3c thoas_load ENS01 2024-04-24 10:18:13 From 96331294-fb65-457b-a3bd-5e41f1818044 7 19 Processed 8742 +8744 a1fae0e7-c124-4849-bfe5-e68c583b7826 genebuild_track ENS01 2024-04-24 10:18:13 From 96331294-fb65-457b-a3bd-5e41f1818044 7 21 Processed 8742 +8745 145ad879-9c5e-4833-9645-ec0e9fb35079 refget_load ENS01 2024-04-24 10:18:13 From 96331294-fb65-457b-a3bd-5e41f1818044 7 24 Processed 8742 +8746 8d55a4f8-0550-4770-aac2-c7963bfa1176 genebuild_compute ENS01 2024-04-24 10:18:13 From 2bc8874e-6672-4293-89d6-0b837005177c 135 8 Processed 250 +8747 d92da251-954c-417b-8e89-03c677a60553 xrefs ENS01 2024-04-24 10:18:13 From 8d55a4f8-0550-4770-aac2-c7963bfa1176 135 13 Processed 8746 +8748 819805e5-7aeb-437c-bb55-3918a7c94e48 protein_features ENS01 2024-04-24 10:18:13 From 8d55a4f8-0550-4770-aac2-c7963bfa1176 135 14 Processed 8746 +8749 9c57cc1b-ee2b-4adf-968b-8b6cec556f95 alpha_fold ENS01 2024-04-24 10:18:13 From 8d55a4f8-0550-4770-aac2-c7963bfa1176 135 15 Processed 8746 +8750 d4c1e848-8a50-4aae-95fe-5efb85833613 genebuild_files ENS01 2024-04-24 10:18:13 From 2bc8874e-6672-4293-89d6-0b837005177c 135 9 Processed 250 +8751 24fbb4e6-45a2-4bb9-a1b7-2b14fafc5135 blast ENS01 2024-04-24 10:18:13 From d4c1e848-8a50-4aae-95fe-5efb85833613 135 16 Processed 8750 +8752 32b8b190-ccb8-4d4e-8080-686fdb2bf853 ftp_dumps ENS01 2024-04-24 10:18:13 From d4c1e848-8a50-4aae-95fe-5efb85833613 135 17 Processed 8750 +8753 ebd92c3c-1aaf-43c6-ae72-dd649e624ea2 genebuild_web ENS01 2024-04-24 10:18:13 From 2bc8874e-6672-4293-89d6-0b837005177c 135 11 Processed 250 +8754 b41d8022-6a58-4a2b-a0fb-6776a722b7f0 thoas_dumps ENS01 2024-04-24 10:18:13 From ebd92c3c-1aaf-43c6-ae72-dd649e624ea2 135 18 Processed 8753 +8755 9d4373b8-4248-4afe-ab14-d6c14c1b19ea browser_files ENS01 2024-04-24 10:18:13 From ebd92c3c-1aaf-43c6-ae72-dd649e624ea2 135 20 Processed 8753 +8756 cc668846-b60c-4544-b151-84e77308595e checksums ENS01 2024-04-24 10:18:13 From ebd92c3c-1aaf-43c6-ae72-dd649e624ea2 135 23 Processed 8753 +8757 7f200073-0c6e-4487-ac0b-f5cb160151f5 genebuild_web ENS01 2024-04-24 10:18:14 From 2bc8874e-6672-4293-89d6-0b837005177c 135 12 Processed 250 +8758 4bedae91-0f82-478f-bba4-23f8dcb83ef0 thoas_load ENS01 2024-04-24 10:18:14 From 7f200073-0c6e-4487-ac0b-f5cb160151f5 135 19 Processed 8757 +8759 8389b690-fd8d-40bf-8bb7-05254261be6b genebuild_track ENS01 2024-04-24 10:18:14 From 7f200073-0c6e-4487-ac0b-f5cb160151f5 135 21 Processed 8757 +8760 e6f310ea-3a25-4adb-aa01-f514fe4d4183 refget_load ENS01 2024-04-24 10:18:14 From 7f200073-0c6e-4487-ac0b-f5cb160151f5 135 24 Processed 8757 +8761 05125758-03b0-43c3-b4eb-973f05293e42 genebuild_compute ENS01 2024-04-24 10:18:14 From 664088c7-356e-418c-adb2-15945b7ebc4b 6 8 Processed 892 +8762 055d833f-a5e3-4dc2-bd19-0827d05a576c xrefs ENS01 2024-04-24 10:18:14 From 05125758-03b0-43c3-b4eb-973f05293e42 6 13 Processed 8761 +8763 966759bd-d77b-4f97-8502-307ba251adc8 protein_features ENS01 2024-04-24 10:18:14 From 05125758-03b0-43c3-b4eb-973f05293e42 6 14 Processed 8761 +8764 154cbdc9-e1de-4d40-9e99-21a6d18cacaf alpha_fold ENS01 2024-04-24 10:18:14 From 05125758-03b0-43c3-b4eb-973f05293e42 6 15 Processed 8761 +8765 9ab20e16-0d40-4145-8ad5-32e498b4cff4 genebuild_files ENS01 2024-04-24 10:18:14 From 664088c7-356e-418c-adb2-15945b7ebc4b 6 9 Processed 892 +8766 0a975f98-3a5d-4270-9770-73cf4c48107b blast ENS01 2024-04-24 10:18:14 From 9ab20e16-0d40-4145-8ad5-32e498b4cff4 6 16 Processed 8765 +8767 4823d7a3-b2c8-4220-8652-20436a20d9ca ftp_dumps ENS01 2024-04-24 10:18:14 From 9ab20e16-0d40-4145-8ad5-32e498b4cff4 6 17 Processed 8765 +8768 a36bfaba-8751-403c-9024-ac00809cb748 genebuild_web ENS01 2024-04-24 10:18:14 From 664088c7-356e-418c-adb2-15945b7ebc4b 6 11 Processed 892 +8769 d781cd94-54f7-403a-8a03-1114db2ccfe6 thoas_dumps ENS01 2024-04-24 10:18:14 From a36bfaba-8751-403c-9024-ac00809cb748 6 18 Processed 8768 +8770 79d32d79-6346-4453-83d1-517ed275840b browser_files ENS01 2024-04-24 10:18:14 From a36bfaba-8751-403c-9024-ac00809cb748 6 20 Processed 8768 +8771 cd5a8672-9177-4e08-8eb2-8a770ee58ce7 checksums ENS01 2024-04-24 10:18:14 From a36bfaba-8751-403c-9024-ac00809cb748 6 23 Processed 8768 +8772 f6561cb1-4cae-47e7-ac63-ad2151f4b927 genebuild_web ENS01 2024-04-24 10:18:14 From 664088c7-356e-418c-adb2-15945b7ebc4b 6 12 Processed 892 +8773 161453c1-3e3c-423e-a4f4-0f048d7c134c thoas_load ENS01 2024-04-24 10:18:14 From f6561cb1-4cae-47e7-ac63-ad2151f4b927 6 19 Processed 8772 +8774 7f771283-0afa-4703-b534-3844646bc8e1 genebuild_track ENS01 2024-04-24 10:18:14 From f6561cb1-4cae-47e7-ac63-ad2151f4b927 6 21 Processed 8772 +8775 00f2c284-5eea-43c0-a225-6bcc319a0b7f refget_load ENS01 2024-04-24 10:18:14 From f6561cb1-4cae-47e7-ac63-ad2151f4b927 6 24 Processed 8772 +8776 da20e2b5-1809-494e-893f-7fb90e8032a1 genebuild_compute ENS01 2024-04-24 10:18:14 From 66db32ae-974f-480c-a60b-63cc49d00f68 107 8 Processed 1006 +8777 8ec9f005-91d7-4015-be09-7b61b6d62c54 xrefs ENS01 2024-04-24 10:18:14 From da20e2b5-1809-494e-893f-7fb90e8032a1 107 13 Processed 8776 +8778 fdd6e615-8ac7-41fc-b8b2-aff7aeb9c99a protein_features ENS01 2024-04-24 10:18:14 From da20e2b5-1809-494e-893f-7fb90e8032a1 107 14 Processed 8776 +8779 f6d9a2a5-d744-4a90-a9b4-8656108bf921 alpha_fold ENS01 2024-04-24 10:18:14 From da20e2b5-1809-494e-893f-7fb90e8032a1 107 15 Processed 8776 +8780 3f66717f-fcc1-4ea6-a6ae-1b038ec5f0cd genebuild_files ENS01 2024-04-24 10:18:14 From 66db32ae-974f-480c-a60b-63cc49d00f68 107 9 Processed 1006 +8781 656bdb4a-c2f0-4ff1-93a8-6a780ba47e26 blast ENS01 2024-04-24 10:18:14 From 3f66717f-fcc1-4ea6-a6ae-1b038ec5f0cd 107 16 Processed 8780 +8782 503fe667-0304-45db-ad36-860b9967290e ftp_dumps ENS01 2024-04-24 10:18:14 From 3f66717f-fcc1-4ea6-a6ae-1b038ec5f0cd 107 17 Processed 8780 +8783 472c2bcc-3de5-446b-8b05-e33c3975acdb genebuild_web ENS01 2024-04-24 10:18:14 From 66db32ae-974f-480c-a60b-63cc49d00f68 107 11 Processed 1006 +8784 83f43633-77ce-4164-8ec4-655707a4029d thoas_dumps ENS01 2024-04-24 10:18:14 From 472c2bcc-3de5-446b-8b05-e33c3975acdb 107 18 Processed 8783 +8785 722acf20-f184-4ac4-b9ad-947de51b051e browser_files ENS01 2024-04-24 10:18:14 From 472c2bcc-3de5-446b-8b05-e33c3975acdb 107 20 Processed 8783 +8786 8e15dcb4-cbd2-4971-a155-8d5956a38c41 checksums ENS01 2024-04-24 10:18:14 From 472c2bcc-3de5-446b-8b05-e33c3975acdb 107 23 Processed 8783 +8787 aa4b8122-4480-4595-b2bf-c8c8f51537ce genebuild_web ENS01 2024-04-24 10:18:14 From 66db32ae-974f-480c-a60b-63cc49d00f68 107 12 Processed 1006 +8788 4e7f51fc-b5f6-4aa3-ab31-b22a23d080b2 thoas_load ENS01 2024-04-24 10:18:14 From aa4b8122-4480-4595-b2bf-c8c8f51537ce 107 19 Processed 8787 +8789 9770c787-1b91-4e65-bf76-5dc0d1c5c75f genebuild_track ENS01 2024-04-24 10:18:14 From aa4b8122-4480-4595-b2bf-c8c8f51537ce 107 21 Processed 8787 +8790 0f140a16-de49-4566-a41d-8e1ccbc8f5d0 refget_load ENS01 2024-04-24 10:18:14 From aa4b8122-4480-4595-b2bf-c8c8f51537ce 107 24 Processed 8787 +8791 e0a75f02-6ac6-4dfa-8196-50cb4803a9b8 evidence 1.0 2024-04-24 10:18:14 From 5c2d6ef7-fe03-4f1a-bcc2-fb72af9ffa46 565 4 Processed 1464 +8792 04892d42-1689-4e76-a158-717f1c773a3d short_variant 1.0 2024-04-24 10:18:14 From 5c2d6ef7-fe03-4f1a-bcc2-fb72af9ffa46 565 5 Processed 1464 +8793 4b9774b2-eabd-4981-b098-521b5b8a13a0 browser_files 1.0 2024-04-24 10:18:14 From 5c2d6ef7-fe03-4f1a-bcc2-fb72af9ffa46 565 31 Processed 1464 +8794 f8c7383b-aaac-41cf-9ac8-dce5f99b5338 variation_ftp 1.0 2024-04-24 10:18:14 From 5c2d6ef7-fe03-4f1a-bcc2-fb72af9ffa46 565 29 Processed 1464 +8795 3aeb9633-f9a6-4693-b51c-875935bb3e16 variation_track 1.0 2024-04-24 10:18:14 From 5c2d6ef7-fe03-4f1a-bcc2-fb72af9ffa46 565 32 Processed 1464 +8796 628143db-4adb-4086-9168-1f7e875bbafe evidence 1.0 2024-04-24 10:18:14 From 69f38cd5-5774-4b29-9ef4-181441ca0eb8 571 4 Processed 1470 +8797 0c150044-3236-4cbd-ba06-19e19d10000a short_variant 1.0 2024-04-24 10:18:14 From 69f38cd5-5774-4b29-9ef4-181441ca0eb8 571 5 Processed 1470 +8798 e488f2c0-8926-4cd4-a3f4-5085885273ba browser_files 1.0 2024-04-24 10:18:14 From 69f38cd5-5774-4b29-9ef4-181441ca0eb8 571 31 Processed 1470 +8799 0b1d6792-050b-461b-b6e7-8013f03caace variation_ftp 1.0 2024-04-24 10:18:14 From 69f38cd5-5774-4b29-9ef4-181441ca0eb8 571 29 Processed 1470 +8800 b55eedc6-c8e9-46f9-8f3a-5487b590d563 variation_track 1.0 2024-04-24 10:18:14 From 69f38cd5-5774-4b29-9ef4-181441ca0eb8 571 32 Processed 1470 +8801 4f615d55-8b10-4004-88c0-169e1016032c evidence 1.0 2024-04-24 10:18:14 From a4c858c4-2e55-43c9-af8b-7adc7ce24c58 592 4 Processed 1491 +8802 9bdb03db-aed4-41af-be5c-7912d5cf82ad short_variant 1.0 2024-04-24 10:18:14 From a4c858c4-2e55-43c9-af8b-7adc7ce24c58 592 5 Processed 1491 +8803 1bc869fb-7586-4394-8da3-40502ce06f28 browser_files 1.0 2024-04-24 10:18:14 From a4c858c4-2e55-43c9-af8b-7adc7ce24c58 592 31 Processed 1491 +8804 0d05a7a3-6a4d-4f61-bcfe-76728865fe0a variation_ftp 1.0 2024-04-24 10:18:14 From a4c858c4-2e55-43c9-af8b-7adc7ce24c58 592 29 Processed 1491 +8805 3f87e8e7-21f6-441c-83a4-a5c7204c5320 variation_track 1.0 2024-04-24 10:18:14 From a4c858c4-2e55-43c9-af8b-7adc7ce24c58 592 32 Processed 1491 +8806 206879dd-6d27-451d-b5aa-8330696afc1a evidence 1.0 2024-04-24 10:18:14 From 79e1a7b8-85dc-4bfe-b606-6992ca109ede 595 4 Processed 1494 +8807 1f82e0d6-2232-4dc7-9dba-c1c62b42c24f short_variant 1.0 2024-04-24 10:18:14 From 79e1a7b8-85dc-4bfe-b606-6992ca109ede 595 5 Processed 1494 +8808 4c9ad5d4-3a35-40b7-ba30-a09b958a205b browser_files 1.0 2024-04-24 10:18:14 From 79e1a7b8-85dc-4bfe-b606-6992ca109ede 595 31 Processed 1494 +8809 1c37a8bf-43d6-4fc8-98d8-b4e0d3a31931 variation_ftp 1.0 2024-04-24 10:18:14 From 79e1a7b8-85dc-4bfe-b606-6992ca109ede 595 29 Processed 1494 +8810 460598d9-1266-4652-8ef8-536e8c1391fe variation_track 1.0 2024-04-24 10:18:14 From 79e1a7b8-85dc-4bfe-b606-6992ca109ede 595 32 Processed 1494 +8811 6c28c1ae-af07-4199-9da9-b48ae0fcb7b7 homology_compute 1.0 2024-04-24 10:18:14 From 35f56606-9186-432f-b033-6e7204708f3b 294 25 Processed 2319 +8812 4d9b87ea-6313-4aee-ad8b-ab2a2813e22a homology_load 1.0 2024-04-24 10:18:14 From 35f56606-9186-432f-b033-6e7204708f3b 294 26 Processed 2319 +8813 9e3a4f8a-95f2-415c-88ec-5da6eb3e7c10 homology_ftp 1.0 2024-04-24 10:18:14 From 35f56606-9186-432f-b033-6e7204708f3b 294 27 Processed 2319 +8814 cb8f93b0-b903-49c8-ad48-1cc4f4ceee6a homology_compute 1.0 2024-04-24 10:18:14 From 8ebbce8e-dcc7-49f8-b520-4d479aef2a65 457 25 Processed 6593 +8815 9045fc64-a2f2-4cfa-b10e-8b55e6e631e2 homology_load 1.0 2024-04-24 10:18:14 From 8ebbce8e-dcc7-49f8-b520-4d479aef2a65 457 26 Processed 6593 +8816 b9c88135-824b-425b-ab1b-156cd58a0bde homology_ftp 1.0 2024-04-24 10:18:15 From 8ebbce8e-dcc7-49f8-b520-4d479aef2a65 457 27 Processed 6593 +8817 fd821999-6a13-407c-b6ae-bda323fc1795 homology_compute 1.0 2024-04-24 10:18:15 From 60fae37a-afe2-4bb1-9da0-321911dd2856 266 25 Processed 6623 +8818 f9634579-f88a-4892-b2c1-1762da95b69e homology_load 1.0 2024-04-24 10:18:15 From 60fae37a-afe2-4bb1-9da0-321911dd2856 266 26 Processed 6623 +8819 5bce46a4-6eea-4b4d-8843-681ed932e251 homology_ftp 1.0 2024-04-24 10:18:15 From 60fae37a-afe2-4bb1-9da0-321911dd2856 266 27 Processed 6623 +8820 1e3c7c73-8748-4b80-afc5-37a4045e0f71 homology_compute 1.0 2024-04-24 10:18:15 From 06debe4a-2b3e-4fab-991e-26863dbe8af8 323 25 Processed 6699 +8821 0c048acb-d4e2-441f-8d8c-86b83e9bf23d homology_load 1.0 2024-04-24 10:18:15 From 06debe4a-2b3e-4fab-991e-26863dbe8af8 323 26 Processed 6699 +8822 021b89f1-0f7f-4478-97aa-0accd0a7606e homology_ftp 1.0 2024-04-24 10:18:15 From 06debe4a-2b3e-4fab-991e-26863dbe8af8 323 27 Processed 6699 +8823 c929d7cd-aa29-4605-863d-60072b9eccc8 homology_compute 1.0 2024-04-24 10:18:15 From f3abf167-6a8f-45cc-b753-22a955123758 387 25 Processed 6896 +8824 ecdd3f2d-74ba-4a2b-96e9-c43bce42e266 homology_load 1.0 2024-04-24 10:18:15 From f3abf167-6a8f-45cc-b753-22a955123758 387 26 Processed 6896 +8825 f7834e60-f18a-4ec6-b8c3-def92135f691 homology_ftp 1.0 2024-04-24 10:18:15 From f3abf167-6a8f-45cc-b753-22a955123758 387 27 Processed 6896 +8826 2f3e7f30-2c95-4bcf-a02f-3f8819ab6562 homology_compute 1.0 2024-04-24 10:18:15 From 9681f4c2-afb4-4a08-8e4d-f26363f65ddf 369 25 Processed 7069 +8827 afdc661a-4e30-411f-b453-14ecf1973672 homology_load 1.0 2024-04-24 10:18:15 From 9681f4c2-afb4-4a08-8e4d-f26363f65ddf 369 26 Processed 7069 +8828 f9fa5df3-e813-4667-aea2-2df4d67ffdb4 homology_ftp 1.0 2024-04-24 10:18:15 From 9681f4c2-afb4-4a08-8e4d-f26363f65ddf 369 27 Processed 7069 +8829 85b782ae-d1a6-4bee-8a48-3dae3c9da7aa homology_compute 1.0 2024-04-24 10:18:15 From d57040b6-0ef5-4e6b-97ef-be0ad94d3a61 424 25 Processed 7177 +8830 3b89fcc0-1809-4de6-ae4e-e82bf34c26bf homology_load 1.0 2024-04-24 10:18:15 From d57040b6-0ef5-4e6b-97ef-be0ad94d3a61 424 26 Processed 7177 +8831 e1a9a2ed-318a-4e57-9136-ecb33082d71c homology_ftp 1.0 2024-04-24 10:18:15 From d57040b6-0ef5-4e6b-97ef-be0ad94d3a61 424 27 Processed 7177 +8832 003c2932-ba1d-47b8-8f32-88b27bd79d87 homology_compute 1.0 2024-04-24 10:18:15 From c4f0eec5-f4b3-4cef-8369-7b13932509e1 284 25 Processed 7320 +8833 9fa37967-8ace-4f6d-925b-dce75a70ab79 homology_load 1.0 2024-04-24 10:18:15 From c4f0eec5-f4b3-4cef-8369-7b13932509e1 284 26 Processed 7320 +8834 94360425-5f48-4bc9-8863-2011b3e8115d homology_ftp 1.0 2024-04-24 10:18:15 From c4f0eec5-f4b3-4cef-8369-7b13932509e1 284 27 Processed 7320 +8835 9c874e53-bf5c-4c49-8a31-123c1025a7a6 homology_compute 1.0 2024-04-24 10:18:15 From d51ab85a-f037-47a3-ba53-423ad8e42669 383 25 Processed 7535 +8836 ff6239d9-bd90-49f9-b3a5-0cee348267b7 homology_load 1.0 2024-04-24 10:18:15 From d51ab85a-f037-47a3-ba53-423ad8e42669 383 26 Processed 7535 +8837 b7b5b776-30f0-469f-bd19-707ba2fa8f3c homology_ftp 1.0 2024-04-24 10:18:15 From d51ab85a-f037-47a3-ba53-423ad8e42669 383 27 Processed 7535 +8838 18783f74-ffd9-4e46-90bd-f18d7ca00896 homology_compute 1.0 2024-04-24 10:18:15 From 254a68c7-f512-446d-a958-983a2713daf2 359 25 Processed 7603 +8839 e2ea6dca-fd18-4cc5-8120-d724bea5a5bd homology_load 1.0 2024-04-24 10:18:15 From 254a68c7-f512-446d-a958-983a2713daf2 359 26 Processed 7603 +8840 86288d80-6d0e-4da8-9c3c-a2a78b740f63 homology_ftp 1.0 2024-04-24 10:18:15 From 254a68c7-f512-446d-a958-983a2713daf2 359 27 Processed 7603 +8841 c40f1ff1-2da6-4569-9c95-dae534bdb59c homology_compute 1.0 2024-04-24 10:18:15 From 9f2a7c92-e04a-443f-a991-1481a9466456 313 25 Processed 7785 +8842 9e45ec23-6f77-40b3-b487-717981b6789a homology_load 1.0 2024-04-24 10:18:15 From 9f2a7c92-e04a-443f-a991-1481a9466456 313 26 Processed 7785 +8843 ea7e6915-8d77-4eee-bb3c-35e93a41e7e9 homology_ftp 1.0 2024-04-24 10:18:15 From 9f2a7c92-e04a-443f-a991-1481a9466456 313 27 Processed 7785 +8844 3c7d8319-a57c-447b-b47f-5a1873a0b018 homology_compute 1.0 2024-04-24 10:18:15 From 681ceca3-3336-4b92-ac11-43b5fcabec62 251 25 Processed 7820 +8845 b9dac26c-ecc2-4ec4-a8ac-d240cd0b748e homology_load 1.0 2024-04-24 10:18:15 From 681ceca3-3336-4b92-ac11-43b5fcabec62 251 26 Processed 7820 +8846 c2772f23-1c2f-4e55-8453-b6985475a629 homology_ftp 1.0 2024-04-24 10:18:15 From 681ceca3-3336-4b92-ac11-43b5fcabec62 251 27 Processed 7820 +8847 7e892555-9d54-479e-9d2c-1fab7e45c03f homology_compute 1.0 2024-04-24 10:18:15 From d78259af-f491-42de-9cbf-de744b09efee 332 25 Processed 7847 +8848 3e2a34c6-cd2b-4e1f-bac4-9acf67254bf8 homology_load 1.0 2024-04-24 10:18:15 From d78259af-f491-42de-9cbf-de744b09efee 332 26 Processed 7847 +8849 4b97ed95-560c-4a4c-95c1-44b654ad0fa0 homology_ftp 1.0 2024-04-24 10:18:15 From d78259af-f491-42de-9cbf-de744b09efee 332 27 Processed 7847 +8850 10e0efaa-e444-4e8d-927e-a761aa30969f homology_compute 1.0 2024-04-24 10:18:15 From b6472939-9e49-4d46-b93e-304910acabf3 4352 25 Processed 8392 +8851 369f5f6b-763f-44ea-8f13-862c06461346 homology_load 1.0 2024-04-24 10:18:15 From b6472939-9e49-4d46-b93e-304910acabf3 4352 26 Processed 8392 +8852 f04c6b1b-4458-4d28-aa47-b43ea12b08c9 homology_ftp 1.0 2024-04-24 10:18:15 From b6472939-9e49-4d46-b93e-304910acabf3 4352 27 Processed 8392 +8853 9ca4293d-6710-4903-9fc1-8417265ce27d genebuild_compute ENS01 2024-04-24 10:18:15 From ed8ca387-38e3-4bfe-8b85-757a59b95126 97 8 Processed 178 +8854 570f2d24-a2c9-4bb2-895f-8653c0def936 xrefs ENS01 2024-04-24 10:18:15 From 9ca4293d-6710-4903-9fc1-8417265ce27d 97 13 Processed 8853 +8855 0f80b03b-1145-4cdd-bf93-9df96b65f160 protein_features ENS01 2024-04-24 10:18:15 From 9ca4293d-6710-4903-9fc1-8417265ce27d 97 14 Processed 8853 +8856 11584801-c419-4acf-bb1f-119303afc0d4 alpha_fold ENS01 2024-04-24 10:18:15 From 9ca4293d-6710-4903-9fc1-8417265ce27d 97 15 Processed 8853 +8857 8ab46307-5632-4b68-9815-83997bf53b94 genebuild_files ENS01 2024-04-24 10:18:15 From ed8ca387-38e3-4bfe-8b85-757a59b95126 97 9 Processed 178 +8858 1848bff4-5285-4c3a-abcc-62869d41dd87 blast ENS01 2024-04-24 10:18:15 From 8ab46307-5632-4b68-9815-83997bf53b94 97 16 Processed 8857 +8859 1861572a-1bde-4720-bbfa-951b9d8a4456 ftp_dumps ENS01 2024-04-24 10:18:15 From 8ab46307-5632-4b68-9815-83997bf53b94 97 17 Processed 8857 +8860 2109cb44-362b-4571-94da-67859d194824 genebuild_web ENS01 2024-04-24 10:18:15 From ed8ca387-38e3-4bfe-8b85-757a59b95126 97 11 Processed 178 +8861 2f09c74f-c2cc-4fea-8d86-6e7461b3366d thoas_dumps ENS01 2024-04-24 10:18:15 From 2109cb44-362b-4571-94da-67859d194824 97 18 Processed 8860 +8862 fdca5cd5-9a2f-4c94-bc52-fcdda4070e4e browser_files ENS01 2024-04-24 10:18:15 From 2109cb44-362b-4571-94da-67859d194824 97 20 Processed 8860 +8863 a2a1216d-2cad-41d8-b115-a6711989abd5 checksums ENS01 2024-04-24 10:18:15 From 2109cb44-362b-4571-94da-67859d194824 97 23 Processed 8860 +8864 7ed8ac66-3af1-419f-ab9c-08d2a40887a2 genebuild_compute ENS01 2024-04-24 10:18:16 From f9690d7e-26c1-459d-8102-0c4a1a468806 9 8 Processed 896 +8865 f2e96294-16ce-4575-af45-f9a2c46383d7 xrefs ENS01 2024-04-24 10:18:16 From 7ed8ac66-3af1-419f-ab9c-08d2a40887a2 9 13 Processed 8864 +8866 165a40b7-2eee-4ba4-b4dc-8b6ad2402004 protein_features ENS01 2024-04-24 10:18:16 From 7ed8ac66-3af1-419f-ab9c-08d2a40887a2 9 14 Processed 8864 +8867 d3aee3e0-6b4b-43c9-8c52-19a18f91f824 alpha_fold ENS01 2024-04-24 10:18:16 From 7ed8ac66-3af1-419f-ab9c-08d2a40887a2 9 15 Processed 8864 +8868 b66be831-e87b-4bd5-ba89-fc283cc50193 genebuild_files ENS01 2024-04-24 10:18:16 From f9690d7e-26c1-459d-8102-0c4a1a468806 9 9 Processed 896 +8869 79505983-6bf5-4828-8240-4d51a36a3171 blast ENS01 2024-04-24 10:18:16 From b66be831-e87b-4bd5-ba89-fc283cc50193 9 16 Processed 8868 +8870 2b747467-a74c-488d-a900-3f6385c1f26a ftp_dumps ENS01 2024-04-24 10:18:16 From b66be831-e87b-4bd5-ba89-fc283cc50193 9 17 Processed 8868 +8871 b42fdfd2-7bca-4cd1-bee4-46287457ee93 genebuild_web ENS01 2024-04-24 10:18:16 From f9690d7e-26c1-459d-8102-0c4a1a468806 9 11 Processed 896 +8872 1568bc5d-9fba-42df-87b3-52f77a8552b7 thoas_dumps ENS01 2024-04-24 10:18:16 From b42fdfd2-7bca-4cd1-bee4-46287457ee93 9 18 Processed 8871 +8873 cff11c40-12e8-469d-945f-c1c6ffb852f5 browser_files ENS01 2024-04-24 10:18:16 From b42fdfd2-7bca-4cd1-bee4-46287457ee93 9 20 Processed 8871 +8874 11e13c3e-5773-4f30-86c1-6c251f9c4c70 checksums ENS01 2024-04-24 10:18:16 From b42fdfd2-7bca-4cd1-bee4-46287457ee93 9 23 Processed 8871 +8875 e79f65ba-08a5-4aca-b3b9-08ff7c36ba70 evidence 1.0 2024-04-24 10:18:16 From 0c1cdfea-cb10-4a08-8f75-4158658d6a02 597 4 Processed 1496 +8876 7d15f5de-2e98-44b0-ba74-9c70bfd450c6 short_variant 1.0 2024-04-24 10:18:16 From 0c1cdfea-cb10-4a08-8f75-4158658d6a02 597 5 Processed 1496 +8877 ea922b02-b8fe-4f7c-ac4d-a133acc5f532 browser_files 1.0 2024-04-24 10:18:16 From 0c1cdfea-cb10-4a08-8f75-4158658d6a02 597 31 Processed 1496 +8878 7a89be81-4103-4eb8-98b2-23e96e0c4f76 genebuild_compute EXT01 2024-04-24 10:18:16 From cc3c7f95-b5dc-4cc1-aa15-2817c89bd1e2 1 8 Released 2 +8879 20c06eab-391d-4b06-943c-0754f0fef146 xrefs EXT01 2024-04-24 10:18:16 From 7a89be81-4103-4eb8-98b2-23e96e0c4f76 1 13 Released 8878 +8880 a29813e0-c950-40fc-b970-a360a10a15b6 protein_features EXT01 2024-04-24 10:18:16 From 7a89be81-4103-4eb8-98b2-23e96e0c4f76 1 14 Released 8878 +8881 5c0b59d2-faf4-4297-b10d-c304e1f55998 alpha_fold EXT01 2024-04-24 10:18:16 From 7a89be81-4103-4eb8-98b2-23e96e0c4f76 1 15 Released 8878 +8882 8d33dbd0-93d9-4279-bdfe-21f756afc898 genebuild_files EXT01 2024-04-24 10:18:16 From cc3c7f95-b5dc-4cc1-aa15-2817c89bd1e2 1 9 Released 2 +8883 7e0ec09a-0ed9-461f-abb4-bb8de9f9b842 blast EXT01 2024-04-24 10:18:16 From 8d33dbd0-93d9-4279-bdfe-21f756afc898 1 16 Released 8882 +8884 551593dc-42ad-45ec-8311-c052330feaac ftp_dumps EXT01 2024-04-24 10:18:16 From 8d33dbd0-93d9-4279-bdfe-21f756afc898 1 17 Released 8882 +8885 7b5a6b07-d345-479f-95a0-c9a6712eb747 genebuild_web EXT01 2024-04-24 10:18:16 From cc3c7f95-b5dc-4cc1-aa15-2817c89bd1e2 1 11 Released 2 +8886 711ea653-09ba-47da-b6ff-585c25548546 thoas_dumps EXT01 2024-04-24 10:18:16 From 7b5a6b07-d345-479f-95a0-c9a6712eb747 1 18 Released 8885 +8887 1f7df867-7a26-4b59-98cb-866e63a215f9 browser_files EXT01 2024-04-24 10:18:16 From 7b5a6b07-d345-479f-95a0-c9a6712eb747 1 20 Released 8885 +8888 34d43bd3-3061-43ac-b0ab-7eb4d7edd3b3 checksums EXT01 2024-04-24 10:18:16 From 7b5a6b07-d345-479f-95a0-c9a6712eb747 1 23 Released 8885 +8889 fd8f0c4f-59c6-4d40-8dc9-1784f312b935 genebuild_web EXT01 2024-04-24 10:18:16 From cc3c7f95-b5dc-4cc1-aa15-2817c89bd1e2 1 12 Released 2 +8890 2b3158ad-aedc-464d-bad1-7dc448a1623a thoas_load EXT01 2024-04-24 10:18:16 From fd8f0c4f-59c6-4d40-8dc9-1784f312b935 1 19 Released 8889 +8891 62b23f57-bc61-4ade-aa1d-77ecf7f8b18c genebuild_track EXT01 2024-04-24 10:18:16 From fd8f0c4f-59c6-4d40-8dc9-1784f312b935 1 21 Released 8889 +8892 620df1db-d908-4406-9f1d-e97b11c798c7 refget_load EXT01 2024-04-24 10:18:16 From fd8f0c4f-59c6-4d40-8dc9-1784f312b935 1 24 Released 8889 +8893 35485780-095a-44ad-a85b-ed37aff1f5ac genebuild_compute EXT01 2024-04-24 10:18:16 From f202cd36-d0dc-40df-9dd6-a8218e0d1366 14 8 Released 24 +8894 7760299d-bc48-4424-82ba-d069153212a1 xrefs EXT01 2024-04-24 10:18:16 From 35485780-095a-44ad-a85b-ed37aff1f5ac 14 13 Released 8893 +8895 95b0c4a8-15fe-4d7e-b07a-3ebc08a95ddf protein_features EXT01 2024-04-24 10:18:16 From 35485780-095a-44ad-a85b-ed37aff1f5ac 14 14 Released 8893 +8896 256fe2d6-3e6f-4c89-9d46-1616de1bac53 alpha_fold EXT01 2024-04-24 10:18:16 From 35485780-095a-44ad-a85b-ed37aff1f5ac 14 15 Released 8893 +8897 273822b7-89dd-4eef-a0d7-c6aae2322939 genebuild_files EXT01 2024-04-24 10:18:16 From f202cd36-d0dc-40df-9dd6-a8218e0d1366 14 9 Released 24 +8898 c090e67c-df71-401f-a90c-1fca8aee34fe blast EXT01 2024-04-24 10:18:16 From 273822b7-89dd-4eef-a0d7-c6aae2322939 14 16 Released 8897 +8899 c51e1970-4e48-41cc-8955-be9172cf5f23 ftp_dumps EXT01 2024-04-24 10:18:16 From 273822b7-89dd-4eef-a0d7-c6aae2322939 14 17 Released 8897 +8900 1dfc15fc-d7d2-499b-beb4-e6ce0f06f743 genebuild_web EXT01 2024-04-24 10:18:16 From f202cd36-d0dc-40df-9dd6-a8218e0d1366 14 11 Released 24 +8901 d5e0ebee-7117-494d-bc1b-6d2c70d9491c thoas_dumps EXT01 2024-04-24 10:18:16 From 1dfc15fc-d7d2-499b-beb4-e6ce0f06f743 14 18 Released 8900 +8902 6f8f8f0b-e755-45cc-97eb-d9c182e873b0 browser_files EXT01 2024-04-24 10:18:16 From 1dfc15fc-d7d2-499b-beb4-e6ce0f06f743 14 20 Released 8900 +8903 ff6ccd65-e644-42e9-8f38-04a8f253bef9 checksums EXT01 2024-04-24 10:18:16 From 1dfc15fc-d7d2-499b-beb4-e6ce0f06f743 14 23 Released 8900 +8904 8945cfec-17e5-48af-83f4-79907740fddd genebuild_web EXT01 2024-04-24 10:18:16 From f202cd36-d0dc-40df-9dd6-a8218e0d1366 14 12 Released 24 +8905 2bdb1dcf-b45b-4d91-82ba-d1c9137ae7b9 thoas_load EXT01 2024-04-24 10:18:16 From 8945cfec-17e5-48af-83f4-79907740fddd 14 19 Released 8904 +8906 a88f003b-d5bf-479f-aeeb-4696a3de728a genebuild_track EXT01 2024-04-24 10:18:16 From 8945cfec-17e5-48af-83f4-79907740fddd 14 21 Released 8904 +8907 fce24263-d668-4daa-9eac-27abdebf3a90 refget_load EXT01 2024-04-24 10:18:16 From 8945cfec-17e5-48af-83f4-79907740fddd 14 24 Released 8904 +8908 07f1ea6a-bc29-4426-ab28-db9e8df67135 genebuild_compute GENCODE19 2024-04-24 10:18:16 From e456d1c2-eea2-40f1-83ee-31912905b695 40 8 Released 62 +8909 50c5df76-ad83-4cdd-822b-59a0a5a5caaa xrefs GENCODE19 2024-04-24 10:18:16 From 07f1ea6a-bc29-4426-ab28-db9e8df67135 40 13 Released 8908 +8910 f45160df-1af8-4bb4-b52b-099c9f5ce005 protein_features GENCODE19 2024-04-24 10:18:16 From 07f1ea6a-bc29-4426-ab28-db9e8df67135 40 14 Released 8908 +8911 9bb457f3-a4c6-43ea-a5d8-df8193e33e2b alpha_fold GENCODE19 2024-04-24 10:18:16 From 07f1ea6a-bc29-4426-ab28-db9e8df67135 40 15 Released 8908 +8912 1cd2a36c-9459-4aae-add4-594bdf9570ae genebuild_files GENCODE19 2024-04-24 10:18:16 From e456d1c2-eea2-40f1-83ee-31912905b695 40 9 Released 62 +8913 f3ac13c6-0943-45e5-b553-6e6ecd5febb0 blast GENCODE19 2024-04-24 10:18:16 From 1cd2a36c-9459-4aae-add4-594bdf9570ae 40 16 Released 8912 +8914 f817d89d-e6c7-474f-b585-a816c9a19926 ftp_dumps GENCODE19 2024-04-24 10:18:16 From 1cd2a36c-9459-4aae-add4-594bdf9570ae 40 17 Released 8912 +8915 6af4ed97-8fde-41f8-9e28-30e4f2ff2d62 genebuild_web GENCODE19 2024-04-24 10:18:16 From e456d1c2-eea2-40f1-83ee-31912905b695 40 11 Released 62 +8916 e055d56a-3878-4ccb-ac8b-56748d103fbd thoas_dumps GENCODE19 2024-04-24 10:18:16 From 6af4ed97-8fde-41f8-9e28-30e4f2ff2d62 40 18 Released 8915 +8917 9c4831c4-8ee3-4058-8325-54a1d642e0a5 browser_files GENCODE19 2024-04-24 10:18:16 From 6af4ed97-8fde-41f8-9e28-30e4f2ff2d62 40 20 Released 8915 +8918 8aa89c54-db39-4f69-9c23-78a2f7077548 checksums GENCODE19 2024-04-24 10:18:16 From 6af4ed97-8fde-41f8-9e28-30e4f2ff2d62 40 23 Released 8915 +8919 c0993f4c-eda0-40e3-807d-ad7ea361e285 genebuild_web GENCODE19 2024-04-24 10:18:16 From e456d1c2-eea2-40f1-83ee-31912905b695 40 12 Released 62 +8920 1bf39fbc-6863-4c23-8960-975a027556e6 thoas_load GENCODE19 2024-04-24 10:18:16 From c0993f4c-eda0-40e3-807d-ad7ea361e285 40 19 Released 8919 +8921 3c1f9d35-8013-40ff-98b5-b62c764f284f genebuild_track GENCODE19 2024-04-24 10:18:16 From c0993f4c-eda0-40e3-807d-ad7ea361e285 40 21 Released 8919 +8922 36d517a8-f972-4350-a44c-88d04d44286f refget_load GENCODE19 2024-04-24 10:18:16 From c0993f4c-eda0-40e3-807d-ad7ea361e285 40 24 Released 8919 +8923 9687952e-9dc8-4240-aece-a968dadbe909 genebuild_compute EXT01 2024-04-24 10:18:17 From 287a5483-55a4-46e6-a58b-a84ba0ddacd6 79 8 Released 148 +8924 3ed69d61-78ad-4cd0-9244-8337c6896de7 xrefs EXT01 2024-04-24 10:18:17 From 9687952e-9dc8-4240-aece-a968dadbe909 79 13 Released 8923 +8925 90cbc64d-d88f-4bdf-ac6c-99e0837f4253 protein_features EXT01 2024-04-24 10:18:17 From 9687952e-9dc8-4240-aece-a968dadbe909 79 14 Released 8923 +8926 a70bc6ec-1678-487e-89a9-c39b338bc624 alpha_fold EXT01 2024-04-24 10:18:17 From 9687952e-9dc8-4240-aece-a968dadbe909 79 15 Released 8923 +8927 704e4912-c270-4647-99bb-e8789d092949 genebuild_files EXT01 2024-04-24 10:18:17 From 287a5483-55a4-46e6-a58b-a84ba0ddacd6 79 9 Released 148 +8928 b90ce8e9-0ddd-4591-b118-ecce2ab04b37 blast EXT01 2024-04-24 10:18:17 From 704e4912-c270-4647-99bb-e8789d092949 79 16 Released 8927 +8929 935a9cee-4712-47ea-8f56-5aacf09b3883 ftp_dumps EXT01 2024-04-24 10:18:17 From 704e4912-c270-4647-99bb-e8789d092949 79 17 Released 8927 +8930 26fe8cb4-05b4-4f6c-83d9-a4b34c32ca08 genebuild_web EXT01 2024-04-24 10:18:17 From 287a5483-55a4-46e6-a58b-a84ba0ddacd6 79 11 Released 148 +8931 b4f77b8b-00e0-4977-9c75-5c2d65a07f0d thoas_dumps EXT01 2024-04-24 10:18:17 From 26fe8cb4-05b4-4f6c-83d9-a4b34c32ca08 79 18 Released 8930 +8932 c82b305f-f783-40ab-af1a-8c2375e8816a browser_files EXT01 2024-04-24 10:18:17 From 26fe8cb4-05b4-4f6c-83d9-a4b34c32ca08 79 20 Released 8930 +8933 fcd75ab2-a057-4d7d-ad37-b7604a7d0764 checksums EXT01 2024-04-24 10:18:17 From 26fe8cb4-05b4-4f6c-83d9-a4b34c32ca08 79 23 Released 8930 +8934 a81f1f36-c2e4-498e-ba53-8c8a28759a4c genebuild_web EXT01 2024-04-24 10:18:17 From 287a5483-55a4-46e6-a58b-a84ba0ddacd6 79 12 Released 148 +8935 56fcabc6-a26f-4007-aef7-28440b60a990 thoas_load EXT01 2024-04-24 10:18:17 From a81f1f36-c2e4-498e-ba53-8c8a28759a4c 79 19 Released 8934 +8936 f9ef4142-f4c9-4def-84af-c9480934d408 genebuild_track EXT01 2024-04-24 10:18:17 From a81f1f36-c2e4-498e-ba53-8c8a28759a4c 79 21 Released 8934 +8937 329e59f5-ff09-4e8f-8398-c5bb37d25a16 refget_load EXT01 2024-04-24 10:18:17 From a81f1f36-c2e4-498e-ba53-8c8a28759a4c 79 24 Released 8934 +8938 d732f47d-4783-4cf3-80ee-566347f27fe5 genebuild_compute GENCODE44 2024-04-24 10:18:17 From 949defef-c4d2-4ab1-8a73-f41d2b3c7719 92 8 Released 172 +8939 f0e56fd2-53a7-4a5d-8c6e-a0e425659e2d xrefs GENCODE44 2024-04-24 10:18:17 From d732f47d-4783-4cf3-80ee-566347f27fe5 92 13 Released 8938 +8940 9e50213a-a3dc-4d86-bff2-2c607ea97be6 protein_features GENCODE44 2024-04-24 10:18:17 From d732f47d-4783-4cf3-80ee-566347f27fe5 92 14 Released 8938 +8941 c683f337-fe1a-4080-8fa6-b2f5921c82f5 alpha_fold GENCODE44 2024-04-24 10:18:17 From d732f47d-4783-4cf3-80ee-566347f27fe5 92 15 Released 8938 +8942 a26a6832-2081-4f10-bdec-9034f9682f88 genebuild_files GENCODE44 2024-04-24 10:18:17 From 949defef-c4d2-4ab1-8a73-f41d2b3c7719 92 9 Released 172 +8943 1bbc28e3-55b6-42aa-a806-231f4d645d79 blast GENCODE44 2024-04-24 10:18:17 From a26a6832-2081-4f10-bdec-9034f9682f88 92 16 Released 8942 +8944 6353961a-be42-43f6-be0e-98dff9a0e6fa ftp_dumps GENCODE44 2024-04-24 10:18:17 From a26a6832-2081-4f10-bdec-9034f9682f88 92 17 Released 8942 +8945 698fc95c-9f61-4a92-8ad8-8dd3f1ec3c77 genebuild_web GENCODE44 2024-04-24 10:18:17 From 949defef-c4d2-4ab1-8a73-f41d2b3c7719 92 11 Released 172 +8946 4f8b970e-054f-48a2-82ee-f638d42cb88c thoas_dumps GENCODE44 2024-04-24 10:18:17 From 698fc95c-9f61-4a92-8ad8-8dd3f1ec3c77 92 18 Released 8945 +8947 893ab5b4-cf72-4ac8-93a9-6b263ef24fec browser_files GENCODE44 2024-04-24 10:18:17 From 698fc95c-9f61-4a92-8ad8-8dd3f1ec3c77 92 20 Released 8945 +8948 0d2e1d80-ec5d-4c83-a777-d0e12dd3a20d checksums GENCODE44 2024-04-24 10:18:17 From 698fc95c-9f61-4a92-8ad8-8dd3f1ec3c77 92 23 Released 8945 +8949 d0c2c132-d6e9-4cc6-b7ea-90aae49fdd97 genebuild_web GENCODE44 2024-04-24 10:18:17 From 949defef-c4d2-4ab1-8a73-f41d2b3c7719 92 12 Released 172 +8950 7cc832cd-2a22-4326-95ec-3b440ad218d0 thoas_load GENCODE44 2024-04-24 10:18:17 From d0c2c132-d6e9-4cc6-b7ea-90aae49fdd97 92 19 Released 8949 +8951 743e6c8d-5fe5-4417-9673-d807b4b494e8 genebuild_track GENCODE44 2024-04-24 10:18:17 From d0c2c132-d6e9-4cc6-b7ea-90aae49fdd97 92 21 Released 8949 +8952 775b793f-124c-4b97-8734-cf38aa2e66d4 refget_load GENCODE44 2024-04-24 10:18:17 From d0c2c132-d6e9-4cc6-b7ea-90aae49fdd97 92 24 Released 8949 +8953 6c9c5c7a-b58a-4fd6-92a2-8c1d2c8e155c genebuild_compute EXT01 2024-04-24 10:18:17 From cfef61f8-7e24-4ed6-945f-baca1b2664a3 214 8 Released 402 +8954 7e303177-8107-4467-affd-60fcb3cb9fe9 xrefs EXT01 2024-04-24 10:18:17 From 6c9c5c7a-b58a-4fd6-92a2-8c1d2c8e155c 214 13 Released 8953 +8955 5ea5d208-db3d-409a-ad3b-90fc25357975 protein_features EXT01 2024-04-24 10:18:17 From 6c9c5c7a-b58a-4fd6-92a2-8c1d2c8e155c 214 14 Released 8953 +8956 bde17d27-1505-4d55-81e1-589b2c160758 alpha_fold EXT01 2024-04-24 10:18:17 From 6c9c5c7a-b58a-4fd6-92a2-8c1d2c8e155c 214 15 Released 8953 +8957 a312a91d-c738-44d2-9117-3289689074bf genebuild_files EXT01 2024-04-24 10:18:17 From cfef61f8-7e24-4ed6-945f-baca1b2664a3 214 9 Released 402 +8958 cabdf704-d7a7-43c6-91b1-ef13643fe743 blast EXT01 2024-04-24 10:18:17 From a312a91d-c738-44d2-9117-3289689074bf 214 16 Released 8957 +8959 35242e52-ac0a-4232-a04e-602712188981 ftp_dumps EXT01 2024-04-24 10:18:17 From a312a91d-c738-44d2-9117-3289689074bf 214 17 Released 8957 +8960 c679cde3-49b9-4b2e-a415-f5e41a2584ad genebuild_web EXT01 2024-04-24 10:18:17 From cfef61f8-7e24-4ed6-945f-baca1b2664a3 214 11 Released 402 +8961 15392806-6489-4ca8-8dfe-4c068b8cbfc5 thoas_dumps EXT01 2024-04-24 10:18:17 From c679cde3-49b9-4b2e-a415-f5e41a2584ad 214 18 Released 8960 +8962 30b00998-cf48-4e19-87a5-0d77a7d8eab8 browser_files EXT01 2024-04-24 10:18:17 From c679cde3-49b9-4b2e-a415-f5e41a2584ad 214 20 Released 8960 +8963 7405d0be-af0e-4a25-909d-79804221ab66 checksums EXT01 2024-04-24 10:18:17 From c679cde3-49b9-4b2e-a415-f5e41a2584ad 214 23 Released 8960 +8964 63902fec-bfaa-49bd-90c7-38910ad11921 genebuild_web EXT01 2024-04-24 10:18:17 From cfef61f8-7e24-4ed6-945f-baca1b2664a3 214 12 Released 402 +8965 84157ee7-4328-4489-a235-062c76cc6bc9 thoas_load EXT01 2024-04-24 10:18:17 From 63902fec-bfaa-49bd-90c7-38910ad11921 214 19 Released 8964 +8966 b1b03468-d47e-4033-b164-e24a77c1419a genebuild_track EXT01 2024-04-24 10:18:17 From 63902fec-bfaa-49bd-90c7-38910ad11921 214 21 Released 8964 +8967 6f136ae5-07b4-457c-9452-b7272490214a refget_load EXT01 2024-04-24 10:18:17 From 63902fec-bfaa-49bd-90c7-38910ad11921 214 24 Released 8964 +8968 0c0887c2-839a-4df9-b9dc-85c7f1605e16 genebuild_compute EXT01 2024-04-24 10:18:17 From ea69f164-cc77-4671-bf97-c7f537dc400e 217 8 Released 406 +8969 8543b0f0-0798-4af5-9ee2-68ab081ce2f9 xrefs EXT01 2024-04-24 10:18:17 From 0c0887c2-839a-4df9-b9dc-85c7f1605e16 217 13 Released 8968 +8970 915f67e9-46aa-419c-b1f1-3e3fe8d37c9f protein_features EXT01 2024-04-24 10:18:17 From 0c0887c2-839a-4df9-b9dc-85c7f1605e16 217 14 Released 8968 +8971 9429df49-33dd-4c4b-bb1e-a086f9be1311 alpha_fold EXT01 2024-04-24 10:18:17 From 0c0887c2-839a-4df9-b9dc-85c7f1605e16 217 15 Released 8968 +8972 b3979b48-55ad-42a3-9f03-b0e6eb1b8408 genebuild_files EXT01 2024-04-24 10:18:17 From ea69f164-cc77-4671-bf97-c7f537dc400e 217 9 Released 406 +8973 4d258abd-6847-486c-9196-bb4da2a13cd0 blast EXT01 2024-04-24 10:18:17 From b3979b48-55ad-42a3-9f03-b0e6eb1b8408 217 16 Released 8972 +8974 fe818538-d329-4e46-b311-71a13f546eb7 ftp_dumps EXT01 2024-04-24 10:18:17 From b3979b48-55ad-42a3-9f03-b0e6eb1b8408 217 17 Released 8972 +8975 537ac1ca-5839-4d21-8f66-815253a29de8 genebuild_web EXT01 2024-04-24 10:18:17 From ea69f164-cc77-4671-bf97-c7f537dc400e 217 11 Released 406 +8976 e6c747ba-6515-447c-9dec-223a5e7b5ab2 thoas_dumps EXT01 2024-04-24 10:18:17 From 537ac1ca-5839-4d21-8f66-815253a29de8 217 18 Released 8975 +8977 2cd06f2e-8ebb-400a-9cd2-6a313dd67b79 browser_files EXT01 2024-04-24 10:18:17 From 537ac1ca-5839-4d21-8f66-815253a29de8 217 20 Released 8975 +8978 e3282d40-5aec-4970-924c-20fb943324fe checksums EXT01 2024-04-24 10:18:18 From 537ac1ca-5839-4d21-8f66-815253a29de8 217 23 Released 8975 +8979 8d7811aa-63db-480f-b2cd-28fbd0e414e7 genebuild_web EXT01 2024-04-24 10:18:18 From ea69f164-cc77-4671-bf97-c7f537dc400e 217 12 Released 406 +8980 bb86329a-3ff5-4aa1-bccb-fa10866c0400 thoas_load EXT01 2024-04-24 10:18:18 From 8d7811aa-63db-480f-b2cd-28fbd0e414e7 217 19 Released 8979 +8981 58a02fb2-387a-4d93-9798-b8a2ec2a990b genebuild_track EXT01 2024-04-24 10:18:18 From 8d7811aa-63db-480f-b2cd-28fbd0e414e7 217 21 Released 8979 +8982 a9e9630d-7e6c-4a03-8b76-3eea9d58ddd5 refget_load EXT01 2024-04-24 10:18:18 From 8d7811aa-63db-480f-b2cd-28fbd0e414e7 217 24 Released 8979 +8983 b847fdd0-205d-4010-a216-a150eb9dcf62 evidence 1.0 2024-04-24 10:18:18 From bf1f5064-8520-4f19-84e4-449aa6c1c1e2 673 4 Released 1391 +8984 d6a11f1e-41fd-409b-a42c-6bb5eed4536d short_variant 1.0 2024-04-24 10:18:18 From bf1f5064-8520-4f19-84e4-449aa6c1c1e2 673 5 Released 1391 +8985 8ba10f26-694b-4e8d-9888-aaa860581af7 variation_ftp 1.0 2024-04-24 10:18:18 From bf1f5064-8520-4f19-84e4-449aa6c1c1e2 673 29 Released 1391 +8986 62bad9a6-2406-4a93-bc6d-1a3c871fce94 browser_files 1.0 2024-04-24 10:18:18 From bf1f5064-8520-4f19-84e4-449aa6c1c1e2 673 31 Released 1391 +8987 48d069c5-f099-414d-bac1-b682f78a1fde variation_track 1.0 2024-04-24 10:18:18 From bf1f5064-8520-4f19-84e4-449aa6c1c1e2 673 32 Released 1391 +8988 07a02fee-c19f-4c11-b35a-2e2b51d67378 evidence 1.0 2024-04-24 10:18:18 From 5b869bbb-098f-4827-afc0-532a2bc88903 674 4 Released 1392 +8989 cc377779-9dc4-4cfb-ba14-81531a7fb69b short_variant 1.0 2024-04-24 10:18:18 From 5b869bbb-098f-4827-afc0-532a2bc88903 674 5 Released 1392 +8990 d2261995-a526-4728-a6a8-568b17c217d3 variation_ftp 1.0 2024-04-24 10:18:18 From 5b869bbb-098f-4827-afc0-532a2bc88903 674 29 Released 1392 +8991 97b81726-34e3-4dbc-976a-ac2e9fa64c17 browser_files 1.0 2024-04-24 10:18:18 From 5b869bbb-098f-4827-afc0-532a2bc88903 674 31 Released 1392 +8992 49d855db-8570-4a4d-9290-d2fc3b1ba02d variation_track 1.0 2024-04-24 10:18:18 From 5b869bbb-098f-4827-afc0-532a2bc88903 674 32 Released 1392 +8993 a487cac9-c58c-48d9-a0b3-2808346ce541 evidence 1.0 2024-04-24 10:18:18 From 0a0bed83-72c7-4f8a-a1cb-97450ef82495 644 4 Released 1528 +8994 c5dec575-8720-46f3-ae5d-a86f22760b30 short_variant 1.0 2024-04-24 10:18:18 From 0a0bed83-72c7-4f8a-a1cb-97450ef82495 644 5 Released 1528 +8995 05e09f88-0d11-47f8-8db0-92714ce6ba42 variation_ftp 1.0 2024-04-24 10:18:18 From 0a0bed83-72c7-4f8a-a1cb-97450ef82495 644 29 Released 1528 +8996 96da2064-5735-4a91-bd1f-dbeff5548b50 browser_files 1.0 2024-04-24 10:18:18 From 0a0bed83-72c7-4f8a-a1cb-97450ef82495 644 31 Released 1528 +8997 c50d02a2-16a0-44a2-be1a-32b63ab04deb variation_track 1.0 2024-04-24 10:18:18 From 0a0bed83-72c7-4f8a-a1cb-97450ef82495 644 32 Released 1528 +8998 c1829f04-cccd-436d-9f0a-1d82ed117064 evidence 1.0 2024-04-24 10:18:18 From ff7cb333-fc39-4f00-93e0-65a0d5eb596b 653 4 Released 1537 +8999 9bf0c2f7-2dc8-4f27-b578-a1cb277e1a63 short_variant 1.0 2024-04-24 10:18:18 From ff7cb333-fc39-4f00-93e0-65a0d5eb596b 653 5 Released 1537 +9000 f3206998-32e5-465b-8b76-d21e3c24bb18 variation_ftp 1.0 2024-04-24 10:18:18 From ff7cb333-fc39-4f00-93e0-65a0d5eb596b 653 29 Released 1537 +9001 dd7a4ab7-d890-488e-b014-b6e9eacf8a3a browser_files 1.0 2024-04-24 10:18:18 From ff7cb333-fc39-4f00-93e0-65a0d5eb596b 653 31 Released 1537 +9002 263fda44-a84a-4879-912e-4de5e75be0ea variation_track 1.0 2024-04-24 10:18:18 From ff7cb333-fc39-4f00-93e0-65a0d5eb596b 653 32 Released 1537 +9003 963a3baf-5f86-4372-b0ce-79726329ce59 evidence 1.0 2024-04-24 10:18:18 From e659bef9-22f7-4ad2-8215-4a48ecd228df 660 4 Released 1544 +9004 cdabf7a3-e5bf-4afa-ae41-80eeca2ec76d short_variant 1.0 2024-04-24 10:18:18 From e659bef9-22f7-4ad2-8215-4a48ecd228df 660 5 Released 1544 +9005 c708b53e-6fbb-49d9-b9c3-a8a09c3b7f0d variation_ftp 1.0 2024-04-24 10:18:18 From e659bef9-22f7-4ad2-8215-4a48ecd228df 660 29 Released 1544 +9006 029df488-7091-47ed-9db7-e1abf23cc429 browser_files 1.0 2024-04-24 10:18:18 From e659bef9-22f7-4ad2-8215-4a48ecd228df 660 31 Released 1544 +9007 b4863deb-6e3f-4f56-9904-f64cb1783409 variation_track 1.0 2024-04-24 10:18:18 From e659bef9-22f7-4ad2-8215-4a48ecd228df 660 32 Released 1544 +9008 9feb9d70-8966-49f4-a385-3777b66ca2a2 homology_compute 1.0 2024-04-24 10:18:18 From 4b02b11e-397c-4a4f-8c13-8b65efb87030 266 25 Released 2291 +9009 da2808ff-be03-4b52-aeb8-5415e509a8b1 homology_load 1.0 2024-04-24 10:18:18 From 4b02b11e-397c-4a4f-8c13-8b65efb87030 266 26 Released 2291 +9010 9aaf2c4c-f026-4c6c-8b65-c475e3be683a homology_ftp 1.0 2024-04-24 10:18:18 From 4b02b11e-397c-4a4f-8c13-8b65efb87030 266 27 Released 2291 +9011 049cc616-bf6e-4f21-b3b3-6ba0672e79ac homology_compute 1.0 2024-04-24 10:18:18 From aebf0b81-4234-4aa9-85cc-abfe91f5eac2 323 25 Released 2348 +9012 0d4335ae-fcd5-4ac4-a1a7-d766c4dad6bc homology_load 1.0 2024-04-24 10:18:18 From aebf0b81-4234-4aa9-85cc-abfe91f5eac2 323 26 Released 2348 +9013 7642b849-ba92-4f47-960c-9dc07b2488c8 homology_ftp 1.0 2024-04-24 10:18:18 From aebf0b81-4234-4aa9-85cc-abfe91f5eac2 323 27 Released 2348 +9014 b2f53226-7a8f-41cc-bef3-f1dc6d7324ad homology_compute 1.0 2024-04-24 10:18:18 From caac6097-4921-4c10-bfc0-1c3e9b2604dc 332 25 Released 2357 +9015 d174a9a1-39df-4e81-94bd-98db30505730 homology_load 1.0 2024-04-24 10:18:18 From caac6097-4921-4c10-bfc0-1c3e9b2604dc 332 26 Released 2357 +9016 23f92c2d-83a6-43de-87c7-a83aad6406a5 homology_ftp 1.0 2024-04-24 10:18:18 From caac6097-4921-4c10-bfc0-1c3e9b2604dc 332 27 Released 2357 +9017 41b03ede-f8f2-4c4c-b1f7-86d506798835 homology_compute 1.0 2024-04-24 10:18:18 From 9f45f1a6-d4d0-4c02-9509-dec5a0d523fb 359 25 Released 2384 +9018 1e809aac-bfe5-4a89-9472-c2f183205f3b homology_load 1.0 2024-04-24 10:18:18 From 9f45f1a6-d4d0-4c02-9509-dec5a0d523fb 359 26 Released 2384 +9019 8c572ee1-140c-43f4-9c6f-287662793018 homology_ftp 1.0 2024-04-24 10:18:18 From 9f45f1a6-d4d0-4c02-9509-dec5a0d523fb 359 27 Released 2384 +9020 70452894-16f7-4b8c-a780-1dd869bec2fa homology_compute 1.0 2024-04-24 10:18:18 From b67e1761-3341-4965-9a5b-041cb8230cb3 369 25 Released 2394 +9021 ba8bd474-977f-4c52-88b4-7cd99f288f2c homology_load 1.0 2024-04-24 10:18:18 From b67e1761-3341-4965-9a5b-041cb8230cb3 369 26 Released 2394 +9022 8b148701-df21-4cdc-8123-66ba2a7a59e0 homology_ftp 1.0 2024-04-24 10:18:18 From b67e1761-3341-4965-9a5b-041cb8230cb3 369 27 Released 2394 +9023 f1ba169e-f9a4-4a01-80cc-f2e2085a07cb homology_compute 1.0 2024-04-24 10:18:18 From e6df4d05-8567-4143-8ea0-c6ad1b5a3fc2 424 25 Released 2449 +9024 794e6ba8-771e-4a7e-b851-f1b48f20806f homology_load 1.0 2024-04-24 10:18:19 From e6df4d05-8567-4143-8ea0-c6ad1b5a3fc2 424 26 Released 2449 +9025 ae16f773-4395-4908-82f1-d5517b75ace5 homology_ftp 1.0 2024-04-24 10:18:19 From e6df4d05-8567-4143-8ea0-c6ad1b5a3fc2 424 27 Released 2449 +9026 d84af6e2-313c-4930-bbac-8b74def7c6b7 homology_compute 1.0 2024-04-24 10:18:19 From 58df568e-48c1-4a3b-838b-448540392f9c 433 25 Released 2458 +9027 b6156c27-8aef-4172-b150-6d6b27b0f4c6 homology_load 1.0 2024-04-24 10:18:19 From 58df568e-48c1-4a3b-838b-448540392f9c 433 26 Released 2458 +9028 73f4fa74-7d7c-4bb4-a71e-1194ef1c244a homology_ftp 1.0 2024-04-24 10:18:19 From 58df568e-48c1-4a3b-838b-448540392f9c 433 27 Released 2458 +9029 078ff308-f035-4c75-aa83-66e0697da057 browser_files 1.0 2024-04-24 10:18:19 From 679d6452-799c-4a2f-8906-0db6c639e498 670 33 Released 2518 +9030 9b6fc878-fa10-49b6-bd99-98ade5e0252f regulation_track 1.0 2024-04-24 10:18:19 From 679d6452-799c-4a2f-8906-0db6c639e498 670 34 Released 2518 +9031 852986f9-e63a-44b7-b182-7182d7070e1f regulation_ftp 1.0 2024-04-24 10:18:19 From 679d6452-799c-4a2f-8906-0db6c639e498 670 35 Released 2518 +9032 7573b939-da2c-4997-8002-9da717ba79d2 genebuild_compute ENS01 2024-04-24 16:07:22 From 2ef7c056-847e-4742-a68b-18c3ece068aa 18 8 Submitted 38 +9033 7bb8919c-d9e0-4eca-9a49-7a6d9e311c8d xrefs ENS01 2024-04-24 16:07:22 From 7573b939-da2c-4997-8002-9da717ba79d2 18 13 Submitted 9032 +9034 a6a43d07-4ddd-4935-96f3-137882be6b5f protein_features ENS01 2024-04-24 16:07:22 From 7573b939-da2c-4997-8002-9da717ba79d2 18 14 Submitted 9032 +9035 3286e886-cdde-45e2-a92c-2a5b7a43744b alpha_fold ENS01 2024-04-24 16:07:22 From 7573b939-da2c-4997-8002-9da717ba79d2 18 15 Submitted 9032 +9036 a41c7eb3-8dd9-4449-bef3-8a2798d324c9 genebuild_files ENS01 2024-04-24 16:07:22 From 2ef7c056-847e-4742-a68b-18c3ece068aa 18 9 Submitted 38 +9037 384e30bb-1940-475b-a7f1-94c3b5fa6251 blast ENS01 2024-04-24 16:07:22 From a41c7eb3-8dd9-4449-bef3-8a2798d324c9 18 16 Submitted 9036 +9038 b0d8755a-d01b-4910-b84c-0e15ef1293ba ftp_dumps ENS01 2024-04-24 16:07:22 From a41c7eb3-8dd9-4449-bef3-8a2798d324c9 18 17 Submitted 9036 +9039 3666e777-8cb5-420e-8f45-7469253db5f6 genebuild_web ENS01 2024-04-24 16:07:22 From 2ef7c056-847e-4742-a68b-18c3ece068aa 18 11 Submitted 38 +9040 282e982f-493b-4f13-a927-3f9e3dc9a8a8 thoas_dumps ENS01 2024-04-24 16:07:22 From 3666e777-8cb5-420e-8f45-7469253db5f6 18 18 Submitted 9039 +9041 f39ac854-157a-48a8-8b81-4345391c59c3 browser_files ENS01 2024-04-24 16:07:22 From 3666e777-8cb5-420e-8f45-7469253db5f6 18 20 Submitted 9039 +9042 6a86ad7d-67d8-4c0b-a504-966225539fc0 checksums ENS01 2024-04-24 16:07:22 From 3666e777-8cb5-420e-8f45-7469253db5f6 18 23 Submitted 9039 +9043 a128c1b9-6f98-40cf-a3ae-321d5e4e1106 genebuild_compute ENS01 2024-04-24 16:07:22 From 11a0be7f-99ae-45d3-a004-dc19bb562330 100 8 Processed 184 +9044 65bacf69-42d4-439c-a436-f76208677771 xrefs ENS01 2024-04-24 16:07:22 From a128c1b9-6f98-40cf-a3ae-321d5e4e1106 100 13 Processed 9043 +9045 d4716792-c4af-4ec9-a14c-220f4768ed88 protein_features ENS01 2024-04-24 16:07:22 From a128c1b9-6f98-40cf-a3ae-321d5e4e1106 100 14 Processed 9043 +9046 17bc6764-fc30-4fe3-8cfc-18d10e5357d3 alpha_fold ENS01 2024-04-24 16:07:22 From a128c1b9-6f98-40cf-a3ae-321d5e4e1106 100 15 Processed 9043 +9047 7e8844b3-733d-4962-a144-70e8cc69a3a6 genebuild_files ENS01 2024-04-24 16:07:22 From 11a0be7f-99ae-45d3-a004-dc19bb562330 100 9 Processed 184 +9048 bec4dc62-aac5-4993-98ef-da92da3c3975 blast ENS01 2024-04-24 16:07:22 From 7e8844b3-733d-4962-a144-70e8cc69a3a6 100 16 Processed 9047 +9049 a187630f-56ea-4012-b10f-96d4eee7e280 ftp_dumps ENS01 2024-04-24 16:07:22 From 7e8844b3-733d-4962-a144-70e8cc69a3a6 100 17 Processed 9047 +9050 372c9ef4-5068-491e-bc9d-f173de3779d4 genebuild_web ENS01 2024-04-24 16:07:22 From 11a0be7f-99ae-45d3-a004-dc19bb562330 100 11 Processed 184 +9051 1f9b9cf6-af64-49da-9f6a-e91fcfe3748b thoas_dumps ENS01 2024-04-24 16:07:22 From 372c9ef4-5068-491e-bc9d-f173de3779d4 100 18 Processed 9050 +9052 14224fd8-39fc-4ce4-955a-9ceb53b7fe17 browser_files ENS01 2024-04-24 16:07:22 From 372c9ef4-5068-491e-bc9d-f173de3779d4 100 20 Processed 9050 +9053 a2d8c490-6152-4d44-8cd6-6318be80c6d0 checksums ENS01 2024-04-24 16:07:22 From 372c9ef4-5068-491e-bc9d-f173de3779d4 100 23 Processed 9050 +9054 569f3264-1e67-474a-bcca-d1f971bdfb6d genebuild_compute ENS01 2024-04-24 16:07:22 From bd63a676-45ff-494a-b26f-2b779cb6c180 179 8 Processed 338 +9055 2f75afb7-07b9-4f26-914b-447609ae9661 xrefs ENS01 2024-04-24 16:07:22 From 569f3264-1e67-474a-bcca-d1f971bdfb6d 179 13 Processed 9054 +9056 4030627d-9f93-418b-b162-ef4c4ea7187c protein_features ENS01 2024-04-24 16:07:22 From 569f3264-1e67-474a-bcca-d1f971bdfb6d 179 14 Processed 9054 +9057 03698609-b0cc-4ebc-ba4e-9c1839c07375 alpha_fold ENS01 2024-04-24 16:07:22 From 569f3264-1e67-474a-bcca-d1f971bdfb6d 179 15 Processed 9054 +9058 5d221954-3022-434b-8167-4837bcb83cdf genebuild_files ENS01 2024-04-24 16:07:22 From bd63a676-45ff-494a-b26f-2b779cb6c180 179 9 Processed 338 +9059 0a2ed457-8cb8-4c32-9670-f2d29accf899 blast ENS01 2024-04-24 16:07:22 From 5d221954-3022-434b-8167-4837bcb83cdf 179 16 Processed 9058 +9060 b8aa05bd-9375-49aa-bdd4-2cd81dded467 ftp_dumps ENS01 2024-04-24 16:07:22 From 5d221954-3022-434b-8167-4837bcb83cdf 179 17 Processed 9058 +9061 032c5450-032b-4bd2-91e5-8b00482bb51f genebuild_web ENS01 2024-04-24 16:07:22 From bd63a676-45ff-494a-b26f-2b779cb6c180 179 11 Processed 338 +9062 900b59d2-8ac7-4e41-a588-a4314dadfe9d thoas_dumps ENS01 2024-04-24 16:07:22 From 032c5450-032b-4bd2-91e5-8b00482bb51f 179 18 Processed 9061 +9063 c813b3e5-9756-4431-86b9-1d78e3242ffc browser_files ENS01 2024-04-24 16:07:22 From 032c5450-032b-4bd2-91e5-8b00482bb51f 179 20 Processed 9061 +9064 6677ee8d-e814-4991-87b1-967d752652f0 checksums ENS01 2024-04-24 16:07:22 From 032c5450-032b-4bd2-91e5-8b00482bb51f 179 23 Processed 9061 +9065 b60e4ed3-9260-42fd-bb44-648e2240c0fd homology_compute 1.0 2024-04-24 16:07:22 From f2734f34-36a0-4594-871d-f7f6d317d05a 429 25 Submitted 2454 +9066 ef13256d-516a-475d-9769-8ec0b487c39a homology_compute 1.0 2024-04-24 16:07:22 From f32b7f9a-97fd-41cd-86be-a5fb5becd335 469 25 Processed 2494 +9067 6246ba7c-ae8c-4e66-b696-0aaceb586d75 homology_compute 1.0 2024-04-24 16:07:22 From f93d21ca-9a24-4c31-ae11-b0f8d3deab6d 423 25 Submitted 6849 +9068 23bac8a9-553f-4e00-85f3-2844d6634364 homology_compute 1.0 2024-04-24 16:07:22 From 5b618784-a5ff-46cc-8102-b082ffb6e447 368 25 Submitted 8130 +9069 dc06cef3-40c1-4924-82aa-d95003b033d0 homology_compute 1.0 2024-04-24 16:07:22 From a5bf42be-63c1-4616-9af1-bc03aea92643 443 25 Submitted 8661 9070 bf1f5064-8520-abcd-84e4-449aa6c1c1e2 variation 2.0 2023-11-09 12:49:25.273751 GRCh38 673 3 Faulty \N 9071 bf1f5064-8520-abcd-84e4-449aa6c221e2 variation 2.0 2023-11-09 12:49:25.273751 GRCh38 673 3 Processing \N -9072 99999999-847e-4742-a68b-18c3ece068aa genebuild ENS01 2023-09-22 15:03:02.000000 GCA_021950905.1_ENS01 18 2 Submitted \N -9073 99999999-da2c-4997-8002-9da717ba79d2 genebuild_compute ENS01 2024-04-24 16:07:22.000000 From 2ef7c056-847e-4742-a68b-18c3ece068aa 18 8 Submitted 9072 -9074 99999999-d9e0-4eca-9a49-7a6d9e311c8d xrefs ENS01 2024-04-24 16:07:22.000000 From 7573b939-da2c-4997-8002-9da717ba79d2 18 13 Submitted 9073 +9072 99999999-847e-4742-a68b-18c3ece068aa genebuild ENS01 2023-09-22 15:03:02 GCA_021950905.1_ENS01 18 2 Submitted \N +9073 99999999-da2c-4997-8002-9da717ba79d2 genebuild_compute ENS01 2024-04-24 16:07:22 From 2ef7c056-847e-4742-a68b-18c3ece068aa 18 8 Submitted 9072 +9074 99999999-d9e0-4eca-9a49-7a6d9e311c8d xrefs ENS01 2024-04-24 16:07:22 From 7573b939-da2c-4997-8002-9da717ba79d2 18 13 Submitted 9073 diff --git a/src/tests/databases/ensembl_genome_metadata/dataset_attribute.txt b/src/tests/databases/ensembl_genome_metadata/dataset_attribute.txt index acce8478..45c652e7 100644 --- a/src/tests/databases/ensembl_genome_metadata/dataset_attribute.txt +++ b/src/tests/databases/ensembl_genome_metadata/dataset_attribute.txt @@ -533,7 +533,6 @@ 193329 5.42 31 172 193284 2014-07 33 172 193281 2023-03 34 172 -193282 ensembl 169 172 193292 toplevel 35 172 193308 2473539 36 172 193286 full_genebuild 37 172 @@ -576,6 +575,7 @@ 193279 39 77 172 193339 14932.57 78 172 193288 19-12-2022 82 172 +193282 ensembl 169 172 193384 224817 170 172 199440 GCA_018505865.1 1 177 199454 481 3 177 @@ -1116,6 +1116,7 @@ 205448 https://wormbase.org/ 85 406 205446 wormbase 169 406 205449 25837 170 406 +211872 EXT01 197 406 187207 1165.04 21 888 187213 7.80 22 888 187216 7.80 23 888 @@ -1230,6 +1231,7 @@ 187331 https://rapid.ensembl.org/info/genome/genebuild/full_genebuild.html 85 890 187329 ensembl 169 890 187332 181221 170 890 +211871 ENS01 197 890 187371 1164.48 21 892 187377 7.80 22 892 187380 7.80 23 892 @@ -2242,5 +2244,3 @@ 211577 23/06/09 183 9030 211498 23/06/06 182 9031 211497 23/06/06 183 9031 -211871 ENS01 197 890 -211872 EXT01 197 406 diff --git a/src/tests/databases/ensembl_genome_metadata/dataset_source.txt b/src/tests/databases/ensembl_genome_metadata/dataset_source.txt index d481a244..50b12532 100644 --- a/src/tests/databases/ensembl_genome_metadata/dataset_source.txt +++ b/src/tests/databases/ensembl_genome_metadata/dataset_source.txt @@ -1,53 +1,53 @@ -1 core bacteria_0_collection_core_57_110_1 -4 core homo_sapiens_gca018473315v1_core_110_1 -5 core homo_sapiens_gca018469415v1_core_110_1 -6 core homo_sapiens_gca018469875v1_core_110_1 -7 core homo_sapiens_gca018505825v1_core_110_1 -9 core homo_sapiens_gca018852615v1_core_110_1 -14 core plasmodium_falciparum_core_57_110_1 -18 core homo_sapiens_gca021950905v1_core_110_1 -40 core homo_sapiens_37_core_110_37 -79 core triticum_aestivum_core_57_110_4 -92 core homo_sapiens_core_110_38 -97 core homo_sapiens_gca018505865v1_core_110_1 -100 core homo_sapiens_gca018852605v1_core_110_1 -107 core homo_sapiens_gca018469925v1_core_110_1 -135 core homo_sapiens_gca018469425v1_core_110_1 -179 core homo_sapiens_gca021951015v1_core_110_1 -185 core homo_sapiens_gca018473295v1_core_110_1 -214 core saccharomyces_cerevisiae_core_57_110_4 -217 core caenorhabditis_elegans_core_57_110_282 -251 compara homo_sapiens_gca018473315v1_compara_110 -266 compara homo_sapiens_37_compara_110 -284 compara homo_sapiens_gca018469425v1_compara_110 -294 compara homo_sapiens_gca018469925v1_compara_110 -313 compara homo_sapiens_gca018505825v1_compara_110 -323 compara plasmodium_falciparum_compara_110 -332 compara saccharomyces_cerevisiae_compara_110 -359 compara triticum_aestivum_compara_110 -368 compara homo_sapiens_gca018852605v1_compara_110 -369 compara caenorhabditis_elegans_compara_110 -383 compara homo_sapiens_gca018469415v1_compara_110 -387 compara homo_sapiens_gca018469875v1_compara_110 -423 compara homo_sapiens_gca018505865v1_compara_110 -424 compara homo_sapiens_compara_110 -429 compara homo_sapiens_gca021950905v1_compara_110 -433 compara escherichia_coli_str_k_12_substr_mg1655_gca_000005845_compara_110 -443 compara homo_sapiens_gca018852615v1_compara_110 -457 compara homo_sapiens_gca018473295v1_compara_110 -469 compara homo_sapiens_gca021951015v1_compara_110 -565 vcf /nfs/production/flicek/ensembl/variation/new_website/v110/api/114e90e0-aa35-4af6-9204-267c988328c3/variation.vcf.gz -571 vcf /nfs/production/flicek/ensembl/variation/new_website/v110/api/ef282def-9a17-4b35-a344-0f0c559e54ab/variation.vcf.gz -576 vcf /nfs/production/flicek/ensembl/variation/new_website/v110/api/9caa2cae-d1c8-4cfc-9ffd-2e13bc3e95b1/variation.vcf.gz -592 vcf /nfs/production/flicek/ensembl/variation/new_website/v110/api/75b7ac15-6373-4ad5-9fb7-23813a5355a4/variation.vcf.gz -595 vcf /nfs/production/flicek/ensembl/variation/new_website/v110/api/1220d766-6fcb-4b80-9106-121f238c0b3d/variation.vcf.gz -597 vcf /nfs/production/flicek/ensembl/variation/new_website/v110/api/8364a820-5485-42d7-a648-1a5eeb858319/variation.vcf.gz -608 vcf /nfs/production/flicek/ensembl/variation/new_website/v110/api/2020e8d5-4d87-47af-be78-0b15e48970a7/variation.vcf.gz -610 vcf /nfs/production/flicek/ensembl/variation/new_website/v110/api/65d4f21f-695a-4ed0-be67-5732a551fea4/variation.vcf.gz -644 vcf /nfs/production/flicek/ensembl/variation/new_website/v110/api/a733574a-93e7-11ec-a39d-005056b38ce3/variation.vcf.gz -653 vcf /nfs/production/flicek/ensembl/variation/new_website/v110/api/a73356e1-93e7-11ec-a39d-005056b38ce3/variation.vcf.gz -660 vcf /nfs/production/flicek/ensembl/variation/new_website/v110/api/a73357ab-93e7-11ec-a39d-005056b38ce3/variation.vcf.gz -670 regulation /nfs/production/flicek/ensembl/regulation/plins/110-mvp-handover/homo_sapiens/GRCh38/homo_sapiens.GRCh38.Regulatory_Build.regulatory_features.bb -673 vcf /nfs/production/flicek/ensembl/variation/new_website/v110/api/a7335667-93e7-11ec-a39d-005056b38ce3/variation.vcf.gz -674 vcf /nfs/production/flicek/ensembl/variation/new_website/v110/api/3704ceb1-948d-11ec-a39d-005056b38ce3/variation.vcf.gz -4352 compara escherichia_coli_str_k_12_substr_mg1655_compara_110 +1 core bacteria_0_collection_core_57_110_1 \N +4 core homo_sapiens_gca018473315v1_core_110_1 \N +5 core homo_sapiens_gca018469415v1_core_110_1 \N +6 core homo_sapiens_gca018469875v1_core_110_1 \N +7 core homo_sapiens_gca018505825v1_core_110_1 \N +9 core homo_sapiens_gca018852615v1_core_110_1 \N +14 core plasmodium_falciparum_core_57_110_1 \N +18 core homo_sapiens_gca021950905v1_core_110_1 \N +40 core homo_sapiens_37_core_110_37 \N +79 core triticum_aestivum_core_57_110_4 \N +92 core homo_sapiens_core_110_38 \N +97 core homo_sapiens_gca018505865v1_core_110_1 \N +100 core homo_sapiens_gca018852605v1_core_110_1 \N +107 core homo_sapiens_gca018469925v1_core_110_1 \N +135 core homo_sapiens_gca018469425v1_core_110_1 \N +179 core homo_sapiens_gca021951015v1_core_110_1 \N +185 core homo_sapiens_gca018473295v1_core_110_1 \N +214 core saccharomyces_cerevisiae_core_57_110_4 \N +217 core caenorhabditis_elegans_core_57_110_282 \N +251 compara homo_sapiens_gca018473315v1_compara_110 \N +266 compara homo_sapiens_37_compara_110 \N +284 compara homo_sapiens_gca018469425v1_compara_110 \N +294 compara homo_sapiens_gca018469925v1_compara_110 \N +313 compara homo_sapiens_gca018505825v1_compara_110 \N +323 compara plasmodium_falciparum_compara_110 \N +332 compara saccharomyces_cerevisiae_compara_110 \N +359 compara triticum_aestivum_compara_110 \N +368 compara homo_sapiens_gca018852605v1_compara_110 \N +369 compara caenorhabditis_elegans_compara_110 \N +383 compara homo_sapiens_gca018469415v1_compara_110 \N +387 compara homo_sapiens_gca018469875v1_compara_110 \N +423 compara homo_sapiens_gca018505865v1_compara_110 \N +424 compara homo_sapiens_compara_110 \N +429 compara homo_sapiens_gca021950905v1_compara_110 \N +433 compara escherichia_coli_str_k_12_substr_mg1655_gca_000005845_compara_110 \N +443 compara homo_sapiens_gca018852615v1_compara_110 \N +457 compara homo_sapiens_gca018473295v1_compara_110 \N +469 compara homo_sapiens_gca021951015v1_compara_110 \N +565 vcf /nfs/production/flicek/ensembl/variation/new_website/v110/api/114e90e0-aa35-4af6-9204-267c988328c3/variation.vcf.gz \N +571 vcf /nfs/production/flicek/ensembl/variation/new_website/v110/api/ef282def-9a17-4b35-a344-0f0c559e54ab/variation.vcf.gz \N +576 vcf /nfs/production/flicek/ensembl/variation/new_website/v110/api/9caa2cae-d1c8-4cfc-9ffd-2e13bc3e95b1/variation.vcf.gz \N +592 vcf /nfs/production/flicek/ensembl/variation/new_website/v110/api/75b7ac15-6373-4ad5-9fb7-23813a5355a4/variation.vcf.gz \N +595 vcf /nfs/production/flicek/ensembl/variation/new_website/v110/api/1220d766-6fcb-4b80-9106-121f238c0b3d/variation.vcf.gz \N +597 vcf /nfs/production/flicek/ensembl/variation/new_website/v110/api/8364a820-5485-42d7-a648-1a5eeb858319/variation.vcf.gz \N +608 vcf /nfs/production/flicek/ensembl/variation/new_website/v110/api/2020e8d5-4d87-47af-be78-0b15e48970a7/variation.vcf.gz \N +610 vcf /nfs/production/flicek/ensembl/variation/new_website/v110/api/65d4f21f-695a-4ed0-be67-5732a551fea4/variation.vcf.gz \N +644 vcf /nfs/production/flicek/ensembl/variation/new_website/v110/api/a733574a-93e7-11ec-a39d-005056b38ce3/variation.vcf.gz \N +653 vcf /nfs/production/flicek/ensembl/variation/new_website/v110/api/a73356e1-93e7-11ec-a39d-005056b38ce3/variation.vcf.gz \N +660 vcf /nfs/production/flicek/ensembl/variation/new_website/v110/api/a73357ab-93e7-11ec-a39d-005056b38ce3/variation.vcf.gz \N +670 regulation /nfs/production/flicek/ensembl/regulation/plins/110-mvp-handover/homo_sapiens/GRCh38/homo_sapiens.GRCh38.Regulatory_Build.regulatory_features.bb \N +673 vcf /nfs/production/flicek/ensembl/variation/new_website/v110/api/a7335667-93e7-11ec-a39d-005056b38ce3/variation.vcf.gz \N +674 vcf /nfs/production/flicek/ensembl/variation/new_website/v110/api/3704ceb1-948d-11ec-a39d-005056b38ce3/variation.vcf.gz \N +4352 compara escherichia_coli_str_k_12_substr_mg1655_compara_110 \N diff --git a/src/tests/databases/ensembl_genome_metadata/dataset_type.txt b/src/tests/databases/ensembl_genome_metadata/dataset_type.txt index 010ea40a..58817bb7 100644 --- a/src/tests/databases/ensembl_genome_metadata/dataset_type.txt +++ b/src/tests/databases/ensembl_genome_metadata/dataset_type.txt @@ -1,34 +1,34 @@ -1 assembly Genomic assembly assembly Compilation of sequences for a genome \N \N \N \N -2 genebuild Genomic Build genebuild_annotation Genomic annotations for an assembly \N \N \N \N -3 variation Variation Annotations variation_annotation Variation annotation set \N \N 2 \N -4 evidence Variation Evidence production_process Variation evidence annotation \N 3 \N \N -5 short_variant Short variants production_process Short variant data \N 3 \N \N -6 homologies Comparative homologies compara_annotation Comparative Genomics annotation \N \N 2 \N -7 regulatory_features Regulatory Annotation regulation_annotation Regulatory annotation for an assembly \N \N 2 \N -8 genebuild_compute External References production_process Xref genome annotation for Genebuild \N 2 \N \N -9 genebuild_files Files dumps production_process File Dumps, either internal or for public consumption \N 2 8 \N -11 genebuild_web Web Geneset content production_process Web Geneset related content \N 2 \N \N -12 genebuild_prep Genebuild preparation production_preparation Web Content for Geneset publication \N 2 8,9,11,12 \N -13 xrefs External References production_process External annotations linking \N 8 \N \N -14 protein_features Protein Features annotations production_process Proteins annotation \N 8 13 \N -15 alpha_fold AlphaFold computation production_process Compute Protein structure with Alphafold \N 8 13 \N -16 blast Blast tools production_process Blast Indexes files \N 9 8 \N -17 ftp_dumps Public FTP files production_process Public FTP flat files geneset dumps \N 9 8 \N -18 thoas_dumps Thoas load flat files production_process Dump flat file to load onto THOAS \N 11 \N \N -19 thoas_load Thoas MongoDB Load production_preparation Load dumped files onto THOAS \N 12 18,23 \N -20 genebuild_browser_files Genome Browser BB Geneset files production_process Production BigBed for Genome Browser \N 11 \N \N -21 genebuild_track Geneset Tracks API production_preparation Register Geneset Track API BigBed files \N 12 20 \N -23 checksums Sequences Checksums production_process Compute core sequence checksums and update metadata \N 11 \N \N -24 refget_load Refget Loading production_preparation Load sequences and their checksum onto Refget app \N 12 22 \N -25 homology_compute Homology annotation production_process Compute Genome homology analysis \N 6 \N \N -26 homology_load Homology dataload production_preparation Load homology data onto Compara Service (MongoDB) \N 6 25 \N -27 homology_ftp Homology tsv public files production_preparation Dump and sync public TSV homology files \N 6 25 \N -28 vep VEP filesets variation_annotation VCF annotation file for geneset \N \N \N \N -29 variation_ftp Public Variation files (vcf) production_preparation VCF files for public FTP \N 3 \N \N -31 variation_browser_files Variation Browser files production_process Variation track browser file \N 3 \N \N -32 variation_track Variation Track production_preparation Variation Track API \N 3 31 \N -33 regulation_browser_files Regulation Browser files production_process Regulation track browser file \N 7 \N \N -34 regulation_track Regulation Track production_preparation Regulation Track API \N 7 33 \N -35 regulation_ftp Regulation Public files production_preparation Regulation public files \N 7 33 \N -37 web_genesearch GeneSearch Index production_publication Gene search indexes provisioning \N \N 36 \N -38 web_genomediscovery Genome Search indexes loading to EBI search production_publication Load dumped data from genebuild_web onto EBI Search engine (SpecieSelector) \N \N 37 \N +1 assembly Genomic assembly assembly Compilation of sequences for a genome \N +2 genebuild Genomic Build genebuild_annotation Genomic annotations for an assembly \N +3 variation Variation Annotations variation_annotation Variation annotation set \N +4 evidence Variation Evidence production_process Variation evidence annotation 3 +5 short_variant Short variants production_process Short variant data 3 +6 homologies Comparative homologies compara_annotation Comparative Genomics annotation \N +7 regulatory_features Regulatory Annotation regulation_annotation Regulatory annotation for an assembly \N +8 genebuild_compute External References production_process Xref genome annotation for Genebuild 2 +9 genebuild_files Files dumps production_process File Dumps, either internal or for public consumption 2 +11 genebuild_web Web Geneset content production_process Web Geneset related content 2 +12 genebuild_prep Genebuild preparation production_preparation Web Content for Geneset publication 2 +13 xrefs External References production_process External annotations linking 8 +14 protein_features Protein Features annotations production_process Proteins annotation 8 +15 alpha_fold AlphaFold computation production_process Compute Protein structure with Alphafold 8 +16 blast Blast tools production_process Blast Indexes files 9 +17 ftp_dumps Public FTP files production_process Public FTP flat files geneset dumps 9 +18 thoas_dumps Thoas load flat files production_process Dump flat file to load onto THOAS 11 +19 thoas_load Thoas MongoDB Load production_preparation Load dumped files onto THOAS 12 +20 genebuild_browser_files Genome Browser BB Geneset files production_process Production BigBed for Genome Browser 11 +21 genebuild_track Geneset Tracks API production_preparation Register Geneset Track API BigBed files 12 +23 checksums Sequences Checksums production_process Compute core sequence checksums and update metadata 11 +24 refget_load Refget Loading production_preparation Load sequences and their checksum onto Refget app 12 +25 homology_compute Homology annotation production_process Compute Genome homology analysis 6 +26 homology_load Homology dataload production_preparation Load homology data onto Compara Service (MongoDB) 6 +27 homology_ftp Homology tsv public files production_preparation Dump and sync public TSV homology files 6 +28 vep VEP filesets variation_annotation VCF annotation file for geneset \N +29 variation_ftp Public Variation files (vcf) production_preparation VCF files for public FTP 3 +31 variation_browser_files Variation Browser files production_process Variation track browser file 3 +32 variation_track Variation Track production_preparation Variation Track API 3 +33 regulation_browser_files Regulation Browser files production_process Regulation track browser file 7 +34 regulation_track Regulation Track production_preparation Regulation Track API 7 +35 regulation_ftp Regulation Public files production_preparation Regulation public files 7 +37 web_genesearch GeneSearch Index production_publication Gene search indexes provisioning \N +38 web_genomediscovery Genome Search indexes loading to EBI search production_publication Load dumped data from genebuild_web onto EBI Search engine (SpecieSelector) \N diff --git a/src/tests/databases/ensembl_genome_metadata/ensembl_release.txt b/src/tests/databases/ensembl_genome_metadata/ensembl_release.txt index 769d578f..67107deb 100644 --- a/src/tests/databases/ensembl_genome_metadata/ensembl_release.txt +++ b/src/tests/databases/ensembl_genome_metadata/ensembl_release.txt @@ -1,6 +1,6 @@ -1 110.1 2023-10-18 MVP Beta-1 1 partial 1 Released 1 -2 110.2 \N MVP Beta-2 0 partial 1 Prepared 2 -3 110.3 \N MVP Beta-3 0 partial 1 Preparing 3 -4 112.0 \N MVP Rel-1 0 partial 1 Planned 4 -5 108.0 2023-06-15 First Beta 0 partial 1 Released 5 -6 114.0 2025-06-15 dataset_test 0 partial 1 Preparing 6 +1 110.1 2020-10-18 2020-10-18 1 partial 1 Released 1 +2 110.2 2021-10-18 2021-10-18 0 partial 1 Prepared 2 +3 110.3 2022-10-18 2022-10-18 0 partial 1 Preparing 3 +4 112.0 2022-11-18 2022-11-18 0 partial 1 Planned 4 +5 108.0 2023-06-15 2023-06-15 0 partial 1 Released 5 +6 114.0 2025-06-15 2025-06-15 0 partial 1 Preparing 6 diff --git a/src/tests/databases/ensembl_genome_metadata/genome.txt b/src/tests/databases/ensembl_genome_metadata/genome.txt index b81281f4..86e09709 100644 --- a/src/tests/databases/ensembl_genome_metadata/genome.txt +++ b/src/tests/databases/ensembl_genome_metadata/genome.txt @@ -1,20 +1,20 @@ -1 a73351f7-93e7-11ec-a39d-005056b38ce3 2023-09-22 15:01:44.000000 1 1 0 escherichia_coli_str_k_12_substr_mg1655 EXT01 2018-09 -4 9caa2cae-d1c8-4cfc-9ffd-2e13bc3e95b1 2023-09-22 15:02:00.000000 4 4 0 homo_sapiens_gca018473315v1 ENS01 2022-08 -5 2020e8d5-4d87-47af-be78-0b15e48970a7 2023-09-22 15:02:01.000000 5 5 0 homo_sapiens_gca018469415v1 ENS01 2022-07 -6 75b7ac15-6373-4ad5-9fb7-23813a5355a4 2023-09-22 15:02:02.000000 6 6 0 homo_sapiens_gca018469875v1 ENS01 2022-07 -7 1220d766-6fcb-4b80-9106-121f238c0b3d 2023-09-22 15:02:04.000000 7 7 0 homo_sapiens_gca018505825v1 ENS01 2022-07 -9 c3dcaca8-aaee-479f-aad8-c7a5e17b7e10 2023-09-22 15:02:11.000000 9 9 0 homo_sapiens_gca018852615v1 ENS01 2022-07 -12 a73356e1-93e7-11ec-a39d-005056b38ce3 2023-09-22 15:03:01.000000 15 14 0 plasmodium_falciparum EXT01 2017-10 -19 56d9b469-097f-48a7-8501-c8416bcbcdfb 2023-09-22 15:03:02.000000 18 9 0 homo_sapiens_gca021950905v1 ENS01 2022-07 -31 3704ceb1-948d-11ec-a39d-005056b38ce3 2023-09-22 15:03:22.000000 40 83 0 homo_sapiens_37 GENCODE19 2013-09 -74 a73357ab-93e7-11ec-a39d-005056b38ce3 2023-09-22 15:04:29.000000 79 72 0 triticum_aestivum EXT01 2018-04 -86 a7335667-93e7-11ec-a39d-005056b38ce3 2023-09-22 15:04:45.000000 92 83 0 homo_sapiens GENCODE44 2023-03 -89 8364a820-5485-42d7-a648-1a5eeb858319 2023-09-22 15:04:50.000000 97 7 0 homo_sapiens_gca018505865v1 ENS01 2022-07 -92 63b4ffbf-0147-4aa7-b0af-7575bb822740 2023-09-22 15:04:53.000000 100 9 0 homo_sapiens_gca018852605v1 ENS01 2022-07 -99 ef282def-9a17-4b35-a344-0f0c559e54ab 2023-09-22 15:04:56.000000 107 6 0 homo_sapiens_gca018469925v1 ENS01 2022-07 -125 114e90e0-aa35-4af6-9204-267c988328c3 2023-09-22 15:05:37.000000 135 5 0 homo_sapiens_gca018469425v1 ENS01 2022-07 -169 af073c3e-d087-46b0-bb62-310e89982450 2023-09-22 15:06:39.000000 180 9 0 homo_sapiens_gca021951015v1 ENS01 2022-07 -174 65d4f21f-695a-4ed0-be67-5732a551fea4 2023-09-22 15:06:43.000000 186 4 0 homo_sapiens_gca018473295v1 ENS01 2022-08 -201 a733574a-93e7-11ec-a39d-005056b38ce3 2023-09-22 15:06:55.000000 216 172 0 saccharomyces_cerevisiae EXT01 2018-10 -203 a733550b-93e7-11ec-a39d-005056b38ce3 2023-09-22 15:06:58.000000 219 175 0 caenorhabditis_elegans EXT01 2014-10 -204 99999999-695a-4ed0-be67-5732a551fea4 2023-09-22 15:06:43.000000 186 4 0 homo_sapiens_gca018473295v1 ENS09 2022-08 +1 a73351f7-93e7-11ec-a39d-005056b38ce3 2023-09-22 15:01:44 1 1 escherichia_coli_str_k_12_substr_mg1655 EXT01 2018-09 community 0 \N asm584v2 +4 9caa2cae-d1c8-4cfc-9ffd-2e13bc3e95b1 2023-09-22 15:02:00 4 4 homo_sapiens_gca018473315v1 ENS01 2022-08 ensembl 0 \N \N +5 2020e8d5-4d87-47af-be78-0b15e48970a7 2023-09-22 15:02:01 5 5 homo_sapiens_gca018469415v1 ENS01 2022-07 ensembl 0 \N \N +6 75b7ac15-6373-4ad5-9fb7-23813a5355a4 2023-09-22 15:02:02 6 6 homo_sapiens_gca018469875v1 ENS01 2022-07 ensembl 0 \N \N +7 1220d766-6fcb-4b80-9106-121f238c0b3d 2023-09-22 15:02:04 7 7 homo_sapiens_gca018505825v1 ENS01 2022-07 ensembl 0 \N \N +9 c3dcaca8-aaee-479f-aad8-c7a5e17b7e10 2023-09-22 15:02:11 9 9 homo_sapiens_gca018852615v1 ENS01 2022-07 ensembl 0 \N \N +12 a73356e1-93e7-11ec-a39d-005056b38ce3 2023-09-22 15:03:01 15 14 plasmodium_falciparum EXT01 2017-10 community 0 \N asm276v2 +19 56d9b469-097f-48a7-8501-c8416bcbcdfb 2023-09-22 15:03:02 18 9 homo_sapiens_gca021950905v1 ENS01 2022-07 ensembl 0 \N \N +31 3704ceb1-948d-11ec-a39d-005056b38ce3 2023-09-22 15:03:22 40 83 homo_sapiens_37 GENCODE19 2013-09 ensembl 0 \N grch37 +74 a73357ab-93e7-11ec-a39d-005056b38ce3 2023-09-22 15:04:29 79 72 triticum_aestivum EXT01 2018-04 community 0 \N iwgsc +86 a7335667-93e7-11ec-a39d-005056b38ce3 2023-09-22 15:04:45 92 83 homo_sapiens GENCODE44 2023-03 ensembl 0 \N grch38 +89 8364a820-5485-42d7-a648-1a5eeb858319 2023-09-22 15:04:50 97 7 homo_sapiens_gca018505865v1 ENS01 2022-07 ensembl 0 \N \N +92 63b4ffbf-0147-4aa7-b0af-7575bb822740 2023-09-22 15:04:53 100 9 homo_sapiens_gca018852605v1 ENS01 2022-07 ensembl 0 \N \N +99 ef282def-9a17-4b35-a344-0f0c559e54ab 2023-09-22 15:04:56 107 6 homo_sapiens_gca018469925v1 ENS01 2022-07 ensembl 0 \N \N +125 114e90e0-aa35-4af6-9204-267c988328c3 2023-09-22 15:05:37 135 5 homo_sapiens_gca018469425v1 ENS01 2022-07 ensembl 0 \N \N +169 af073c3e-d087-46b0-bb62-310e89982450 2023-09-22 15:06:39 180 9 homo_sapiens_gca021951015v1 ENS01 2022-07 ensembl 0 \N \N +174 65d4f21f-695a-4ed0-be67-5732a551fea4 2023-09-22 15:06:43 186 4 homo_sapiens_gca018473295v1 ENS01 2022-08 ensembl 0 \N \N +201 a733574a-93e7-11ec-a39d-005056b38ce3 2023-09-22 15:06:55 216 172 saccharomyces_cerevisiae EXT01 2018-10 community 0 \N r64-1-1 +203 a733550b-93e7-11ec-a39d-005056b38ce3 2023-09-22 15:06:58 219 175 caenorhabditis_elegans EXT01 2014-10 wormbase 0 \N wbcel235 +204 99999999-695a-4ed0-be67-5732a551fea4 2023-09-22 15:06:43 186 4 homo_sapiens_gca018473295v1 ENS09 2022-08 ensembl 0 \N \N diff --git a/src/tests/databases/ensembl_genome_metadata/genome_dataset.txt b/src/tests/databases/ensembl_genome_metadata/genome_dataset.txt index f7541c3f..d0e4e0fb 100644 --- a/src/tests/databases/ensembl_genome_metadata/genome_dataset.txt +++ b/src/tests/databases/ensembl_genome_metadata/genome_dataset.txt @@ -496,4 +496,4 @@ 9016 0 9071 86 \N 9017 0 9072 204 6 9018 0 9073 204 6 -9019 0 9074 204 6 \ No newline at end of file +9019 0 9074 204 6 diff --git a/src/tests/databases/ensembl_genome_metadata/ncbi_taxa_name.txt b/src/tests/databases/ensembl_genome_metadata/ncbi_taxa_name.txt index 5555cf0c..dd6accd1 100644 --- a/src/tests/databases/ensembl_genome_metadata/ncbi_taxa_name.txt +++ b/src/tests/databases/ensembl_genome_metadata/ncbi_taxa_name.txt @@ -1,11 +1,11 @@ 562 Achromobacter sp. ATCC 35328 includes 562 ATCC 11775 type material -562 "Bacillus coli" Migula 1895 authority +562 Bacillus coli Migula 1895 authority 562 Bacillus coli synonym 562 bacterium 10a includes -562 "Bacterium coli commune" Escherich 1885 authority +562 Bacterium coli commune Escherich 1885 authority 562 Bacterium coli commune synonym -562 "Bacterium coli" (Migula 1895) Lehmann and Neumann 1896 authority +562 Bacterium coli (Migula 1895) Lehmann and Neumann 1896 authority 562 Bacterium coli synonym 562 bacterium E3 includes 562 CCUG 24 type material diff --git a/src/tests/databases/ensembl_genome_metadata/table.sql b/src/tests/databases/ensembl_genome_metadata/table.sql index 61540e45..a59fa934 100644 --- a/src/tests/databases/ensembl_genome_metadata/table.sql +++ b/src/tests/databases/ensembl_genome_metadata/table.sql @@ -1,262 +1,228 @@ -CREATE TABLE assembly +CREATE TABLE `assembly` ( - assembly_id int auto_increment primary key, - ucsc_name varchar(16) null, - accession varchar(16) not null, - level varchar(32) not null, - name varchar(128) not null, - accession_body varchar(32) null, - assembly_default varchar(128) null, - tol_id varchar(32) null, - created datetime(6) null, - ensembl_name varchar(255) null, - alt_accession varchar(16) null, - assembly_uuid char(36) not null, - is_reference tinyint(1) not null, - url_name varchar(128) null, - constraint accession - unique (accession), - constraint assembly_uuid - unique (assembly_uuid), - constraint ensembl_name - unique (ensembl_name) -); + `assembly_id` int(11) NOT NULL AUTO_INCREMENT, + `ucsc_name` varchar(16) DEFAULT NULL, + `accession` varchar(16) NOT NULL, + `level` varchar(32) NOT NULL, + `name` varchar(128) NOT NULL, + `accession_body` varchar(32) DEFAULT NULL, + `assembly_default` varchar(128) DEFAULT NULL, + `tol_id` varchar(32) DEFAULT NULL, + `created` datetime(6) DEFAULT NULL, + `ensembl_name` varchar(255) DEFAULT NULL, + `assembly_uuid` char(36) NOT NULL, + `is_reference` tinyint(1) NOT NULL, + PRIMARY KEY (`assembly_id`), + UNIQUE KEY `accession` (`accession`), + UNIQUE KEY `assembly_uuid` (`assembly_uuid`), + UNIQUE KEY `ensembl_name` (`ensembl_name`) +) ENGINE=InnoDB AUTO_INCREMENT=220 DEFAULT CHARSET=latin1; -CREATE TABLE assembly_sequence +CREATE TABLE `assembly_sequence` ( - assembly_sequence_id int auto_increment primary key, - name varchar(128) null, - accession varchar(128) not null, - chromosomal tinyint(1) not null, - length int not null, - sequence_location varchar(10) null, - md5 varchar(32) null, - assembly_id int not null, - chromosome_rank int null, - sha512t24u varchar(128) null, - is_circular tinyint(1) not null, - type varchar(26) not null, - constraint assembly_sequence_assembly_id_accession_5f3e5119_uniq - unique (assembly_id, accession), - constraint assembly_sequence_assembly_id_2a84ddcb_fk_assembly_assembly_id - foreign key (assembly_id) references assembly (assembly_id) - on delete cascade -); + `assembly_sequence_id` int(11) NOT NULL AUTO_INCREMENT, + `name` varchar(128) DEFAULT NULL, + `accession` varchar(128) NOT NULL, + `chromosomal` tinyint(1) NOT NULL DEFAULT '0', + `length` int(11) NOT NULL, + `sequence_location` varchar(10) DEFAULT NULL, + `md5` varchar(32) DEFAULT NULL, + `assembly_id` int(11) NOT NULL, + `chromosome_rank` int(11) DEFAULT NULL, + `sha512t24u` varchar(128) DEFAULT NULL, + `is_circular` tinyint(1) NOT NULL DEFAULT '0', + `type` varchar(26) NOT NULL, + `additional` tinyint(1) NOT NULL DEFAULT '0', + `source` varchar(120) DEFAULT NULL, + PRIMARY KEY (`assembly_sequence_id`), + UNIQUE KEY `assembly_sequence_assembly_id_accession_5f3e5119_uniq` (`assembly_id`,`accession`), + KEY `assembly_sequence_assembly_id_chromosomal_index` (`assembly_id`,`chromosomal`), + KEY `assembly_sequence_name_assembly_id_index` (`name`,`assembly_id`), + CONSTRAINT `assembly_sequence_assembly_id_2a84ddcb_fk_assembly_assembly_id` FOREIGN KEY (`assembly_id`) REFERENCES `assembly` (`assembly_id`) ON DELETE CASCADE +) ENGINE=InnoDB AUTO_INCREMENT=3785696 DEFAULT CHARSET=latin1; -create index assembly_sequence_assembly_id_chromosomal_index - on assembly_sequence (assembly_id, chromosomal); - -create index assembly_sequence_name_assembly_id_index - on assembly_sequence (name, assembly_id); - -CREATE TABLE attribute +CREATE TABLE `attribute` ( - attribute_id int auto_increment primary key, - name varchar(128) not null, - label varchar(128) not null, - description varchar(255) null, - type enum ('integer', 'float', 'percent', 'string', 'bp') default 'string' null, - required tinyint(1) DEFAULT '0' not null, - constraint name - unique (name), - constraint name_2 - unique (name), - constraint name_3 - unique (name) -); + `attribute_id` int(11) NOT NULL AUTO_INCREMENT, + `name` varchar(128) NOT NULL, + `label` varchar(128) NOT NULL, + `description` varchar(255) DEFAULT NULL, + `type` enum('string','integer','bp','percent','float') NOT NULL, + `required` tinyint(1) NOT NULL DEFAULT '0', + PRIMARY KEY (`attribute_id`), + UNIQUE KEY `name` (`name`), + UNIQUE KEY `name_2` (`name`), + UNIQUE KEY `name_3` (`name`) +) ENGINE=InnoDB AUTO_INCREMENT=198 DEFAULT CHARSET=latin1; -CREATE TABLE dataset_source +CREATE TABLE `dataset` ( - dataset_source_id int auto_increment primary key, - type varchar(32) not null, - name varchar(255) not null, - constraint name - unique (name) -); + `dataset_id` int(11) NOT NULL AUTO_INCREMENT, + `dataset_uuid` char(36) NOT NULL, + `name` varchar(128) NOT NULL, + `version` varchar(128) DEFAULT NULL, + `created` datetime(6) NOT NULL, + `label` varchar(128) NOT NULL, + `dataset_source_id` int(11) NOT NULL, + `dataset_type_id` int(11) NOT NULL, + `status` enum('Submitted','Processing','Processed','Released','Faulty','Suppressed') NOT NULL DEFAULT 'Submitted', + `parent_id` int(11) DEFAULT NULL, + PRIMARY KEY (`dataset_id`), + KEY `dataset_dataset_source_id_fd96f115_fk_dataset_s` (`dataset_source_id`), + KEY `dataset_dataset_type_id_47284562_fk_dataset_type_dataset_type_id` (`dataset_type_id`), + KEY `dataset_parent_id_fk` (`parent_id`), + CONSTRAINT `dataset_dataset_source_id_fd96f115_fk_dataset_s` FOREIGN KEY (`dataset_source_id`) REFERENCES `dataset_source` (`dataset_source_id`) ON DELETE CASCADE, + CONSTRAINT `dataset_dataset_type_id_47284562_fk_dataset_type_dataset_type_id` FOREIGN KEY (`dataset_type_id`) REFERENCES `dataset_type` (`dataset_type_id`), + CONSTRAINT `dataset_parent_id_fk` FOREIGN KEY (`parent_id`) REFERENCES `dataset` (`dataset_id`) ON DELETE CASCADE +) ENGINE=InnoDB AUTO_INCREMENT=9075 DEFAULT CHARSET=latin1; -CREATE TABLE dataset_type +CREATE TABLE `dataset_attribute` ( - dataset_type_id int auto_increment primary key, - name varchar(32) not null, - label varchar(128) not null, - topic varchar(32) not null, - description varchar(255) null, - details_uri varchar(255) null, - parent_id int default null, - depends_on varchar(128) null, - filter_on longtext null, - constraint dataset_type_parent_id_fk - foreign key (parent_id) references dataset_type (dataset_type_id) - on delete set null -); + `dataset_attribute_id` int(11) NOT NULL AUTO_INCREMENT, + `value` varchar(128) DEFAULT NULL, + `attribute_id` int(11) NOT NULL, + `dataset_id` int(11) NOT NULL, + PRIMARY KEY (`dataset_attribute_id`), + UNIQUE KEY `dataset_attribute_dataset_id_attribute_id_value_4d1ddfaf_uniq` (`dataset_id`,`attribute_id`,`value`), + KEY `dataset_attribute_attribute_id_55c51407_fk_attribute` (`attribute_id`), + KEY `dataset_attribute_dataset_id_2e2afe19` (`dataset_id`), + CONSTRAINT `dataset_attribute_attribute_id_55c51407_fk_attribute` FOREIGN KEY (`attribute_id`) REFERENCES `attribute` (`attribute_id`) ON DELETE CASCADE, + CONSTRAINT `dataset_attribute_dataset_id_2e2afe19_fk_dataset_dataset_id` FOREIGN KEY (`dataset_id`) REFERENCES `dataset` (`dataset_id`) ON DELETE CASCADE +) ENGINE=InnoDB AUTO_INCREMENT=211873 DEFAULT CHARSET=latin1; -CREATE TABLE dataset +CREATE TABLE `dataset_source` ( - dataset_id int auto_increment primary key, - dataset_uuid char(36) not null, - name varchar(128) not null, - version varchar(128) null, - created datetime(6) not null, - label varchar(128) not null, - dataset_source_id int not null, - dataset_type_id int not null, - status varchar(12) not null, - parent_id int default null, - constraint dataset_dataset_source_id_fd96f115_fk_dataset_s - foreign key (dataset_source_id) references dataset_source (dataset_source_id) - on delete cascade, - constraint dataset_dataset_type_id_47284562_fk_dataset_type_dataset_type_id - foreign key (dataset_type_id) references dataset_type (dataset_type_id), - constraint dataset_parent_id_fk - foreign key (parent_id) references dataset (dataset_id) - on delete cascade -); + `dataset_source_id` int(11) NOT NULL AUTO_INCREMENT, + `type` varchar(32) NOT NULL, + `name` varchar(255) NOT NULL, + `location` varchar(120) DEFAULT NULL, + PRIMARY KEY (`dataset_source_id`), + UNIQUE KEY `name` (`name`) +) ENGINE=InnoDB AUTO_INCREMENT=4353 DEFAULT CHARSET=latin1; -CREATE TABLE dataset_attribute +CREATE TABLE `dataset_type` ( - dataset_attribute_id int auto_increment primary key, - value varchar(128) null, - attribute_id int not null, - dataset_id int not null, - constraint dataset_attribute_dataset_id_attribute_id_value_4d1ddfaf_uniq - unique (dataset_id, attribute_id, value), - constraint dataset_attribute_attribute_id_55c51407_fk_attribute - foreign key (attribute_id) references attribute (attribute_id) - on delete cascade, - constraint dataset_attribute_dataset_id_2e2afe19_fk_dataset_dataset_id - foreign key (dataset_id) references dataset (dataset_id) - on delete cascade -); + `dataset_type_id` int(11) NOT NULL AUTO_INCREMENT, + `name` varchar(32) NOT NULL, + `label` varchar(128) NOT NULL, + `topic` varchar(32) NOT NULL, + `description` varchar(255) DEFAULT NULL, + `parent_id` int(11) DEFAULT NULL, + PRIMARY KEY (`dataset_type_id`), + UNIQUE KEY `name` (`name`), + KEY `dataset_type_parent_id_fk` (`parent_id`), + CONSTRAINT `dataset_type_parent_id_fk` FOREIGN KEY (`parent_id`) REFERENCES `dataset_type` (`dataset_type_id`) ON DELETE SET NULL +) ENGINE=InnoDB AUTO_INCREMENT=39 DEFAULT CHARSET=latin1; -create index dataset_attribute_dataset_id_2e2afe19 - on dataset_attribute (dataset_id); - -CREATE TABLE ensembl_site +CREATE TABLE `ensembl_release` ( - site_id int auto_increment primary key, - name varchar(64) not null, - label varchar(64) not null, - uri varchar(64) not null -); + `release_id` int(11) NOT NULL AUTO_INCREMENT, + `version` decimal(10, 1) NOT NULL, + `release_date` date NOT NULL, + `label` varchar(64) NOT NULL, + `is_current` tinyint(1) NOT NULL DEFAULT '0', + `release_type` enum('integrated','partial') NOT NULL, + `site_id` int(11) NOT NULL, + `status` varchar(12) NOT NULL, + `name` varchar(3) DEFAULT NULL, + PRIMARY KEY (`release_id`), + UNIQUE KEY `ensembl_release_version_site_id_b743399a_uniq` (`version`,`site_id`), + KEY `ensembl_release_site_id_7c2f537a_fk_ensembl_site_site_id` (`site_id`), + CONSTRAINT `ensembl_release_site_id_7c2f537a_fk_ensembl_site_site_id` FOREIGN KEY (`site_id`) REFERENCES `ensembl_site` (`site_id`) +) ENGINE=InnoDB AUTO_INCREMENT=7 DEFAULT CHARSET=latin1; -CREATE TABLE ensembl_release +CREATE TABLE `ensembl_site` ( - release_id int auto_increment primary key, - version decimal(10, 1) not null, - release_date date null, - label varchar(64) null, - is_current tinyint(1) not null, - release_type varchar(16) not null, - site_id int null, - status varchar(12) not null, - name varchar(3) null, - constraint ensembl_release_version_site_id_b743399a_uniq - unique (version, site_id), - constraint ensembl_release_site_id_7c2f537a_fk_ensembl_site_site_id - foreign key (site_id) references ensembl_site (site_id) -); + `site_id` int(11) NOT NULL AUTO_INCREMENT, + `name` varchar(64) NOT NULL, + `label` varchar(64) NOT NULL, + `uri` varchar(64) NOT NULL, + PRIMARY KEY (`site_id`) +) ENGINE=InnoDB AUTO_INCREMENT=2 DEFAULT CHARSET=latin1; -CREATE TABLE organism -( - organism_id int auto_increment primary key, - taxonomy_id int not null, - species_taxonomy_id int null, - common_name varchar(128) not null, - strain varchar(128) null, - scientific_name varchar(128) null, - biosample_id varchar(128) not null, - scientific_parlance_name varchar(255) null, - organism_uuid char(36) not null, - strain_type varchar(128) null, - `rank` int default 0 null, - constraint ensembl_name - unique (biosample_id), - constraint organism_uuid - unique (organism_uuid) -); -CREATE TABLE genome +CREATE TABLE `genome` ( - genome_id int auto_increment - primary key, - genome_uuid char(36) not null, - created datetime(6) not null, - assembly_id int not null, - organism_id int not null, - is_best tinyint(1) default 0 not null, - production_name varchar(255) not null, - genebuild_version varchar(20) null, - genebuild_date varchar(20) null, - constraint genome_genome_uuid_6b62d0ad_uniq - unique (genome_uuid), - constraint genome_assembly_id_0a748388_fk_assembly_assembly_id - foreign key (assembly_id) references assembly (assembly_id) - on delete cascade, - constraint genome_organism_id_99ad7f35_fk_organism_organism_id - foreign key (organism_id) references organism (organism_id) - on delete cascade -); + `genome_id` int(11) NOT NULL AUTO_INCREMENT, + `genome_uuid` char(36) NOT NULL, + `created` datetime(6) NOT NULL, + `assembly_id` int(11) NOT NULL, + `organism_id` int(11) NOT NULL, + `production_name` varchar(120) NOT NULL, + `genebuild_version` varchar(64) NOT NULL, + `genebuild_date` varchar(20) NOT NULL, + `annotation_source` varchar(120) NOT NULL, + `suppressed` tinyint(1) NOT NULL DEFAULT '0', + `suppression_details` varchar(255) DEFAULT NULL, + `url_name` varchar(128) DEFAULT NULL, + PRIMARY KEY (`genome_id`), + UNIQUE KEY `genome_genome_uuid_6b62d0ad_uniq` (`genome_uuid`), + KEY `genome_assembly_id_0a748388_fk_assembly_assembly_id` (`assembly_id`), + KEY `genome_organism_id_99ad7f35_fk_organism_organism_id` (`organism_id`), + CONSTRAINT `genome_assembly_id_0a748388_fk_assembly_assembly_id` FOREIGN KEY (`assembly_id`) REFERENCES `assembly` (`assembly_id`) ON DELETE CASCADE, + CONSTRAINT `genome_organism_id_99ad7f35_fk_organism_organism_id` FOREIGN KEY (`organism_id`) REFERENCES `organism` (`organism_id`) ON DELETE CASCADE +) ENGINE=InnoDB AUTO_INCREMENT=205 DEFAULT CHARSET=latin1; - -CREATE TABLE genome_dataset +CREATE TABLE `genome_dataset` ( - genome_dataset_id int auto_increment primary key, - is_current tinyint(1) not null, - dataset_id int not null, - genome_id int not null, - release_id int null, - constraint uk_genome_dataset UNIQUE KEY (dataset_id, genome_id), - constraint genome_dataset_dataset_id_0e9b7c99_fk_dataset_dataset_id - foreign key (dataset_id) references dataset (dataset_id) - on delete cascade, - constraint genome_dataset_genome_id_21d55a50_fk_genome_genome_id - foreign key (genome_id) references genome (genome_id) - on delete cascade, - constraint genome_dataset_release_id_1903f87c_fk_ensembl_release_release_id - foreign key (release_id) references ensembl_release (release_id) - on delete set null -); + `genome_dataset_id` int(11) NOT NULL AUTO_INCREMENT, + `is_current` tinyint(1) NOT NULL, + `dataset_id` int(11) NOT NULL, + `genome_id` int(11) NOT NULL, + `release_id` int(11) DEFAULT NULL, + PRIMARY KEY (`genome_dataset_id`), + UNIQUE KEY `uk_genome_dataset` (`dataset_id`,`genome_id`), + KEY `genome_dataset_genome_id_21d55a50_fk_genome_genome_id` (`genome_id`), + KEY `genome_dataset_release_id_1903f87c_fk_ensembl_release_release_id` (`release_id`), + CONSTRAINT `genome_dataset_dataset_id_0e9b7c99_fk_dataset_dataset_id` FOREIGN KEY (`dataset_id`) REFERENCES `dataset` (`dataset_id`) ON DELETE CASCADE, + CONSTRAINT `genome_dataset_genome_id_21d55a50_fk_genome_genome_id` FOREIGN KEY (`genome_id`) REFERENCES `genome` (`genome_id`) ON DELETE CASCADE, + CONSTRAINT `genome_dataset_release_id_1903f87c_fk_ensembl_release_release_id` FOREIGN KEY (`release_id`) REFERENCES `ensembl_release` (`release_id`) ON DELETE SET NULL +) ENGINE=InnoDB AUTO_INCREMENT=9020 DEFAULT CHARSET=latin1; -CREATE TABLE genome_release +CREATE TABLE `genome_group` ( - genome_release_id int auto_increment primary key, - is_current tinyint(1) not null, - genome_id int not null, - release_id int not null, - constraint uk_genome_dataset UNIQUE KEY (release_id, genome_id), - constraint genome_release_genome_id_3e45dc04_fk_genome_genome_id - foreign key (genome_id) references genome (genome_id), - constraint genome_release_release_id_bca7e1e5_fk_ensembl_release_release_id - foreign key (release_id) references ensembl_release (release_id) -); + `genome_group_id` int(11) NOT NULL AUTO_INCREMENT, + `type` enum('compara_reference','structural_variant','project') NOT NULL, + `name` varchar(128) NOT NULL, + `label` varchar(128) DEFAULT NULL, + `searchable` tinyint(1) NOT NULL DEFAULT '0', + `description` varchar(255) DEFAULT NULL, + PRIMARY KEY (`genome_group_id`), + UNIQUE KEY `unique_type_name` (`type`,`name`), + KEY `idx_type` (`type`), + KEY `idx_searchable` (`searchable`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=COMPACT; -CREATE TABLE organism_group +CREATE TABLE `genome_group_member` ( - organism_group_id int auto_increment primary key, - type varchar(32) null, - name varchar(255) not null, - code varchar(48) null, - constraint code - unique (code), - constraint organism_group_type_name_170b6dae_uniq - unique (type, name) -); + `genome_group_member_id` int(11) NOT NULL AUTO_INCREMENT, + `is_reference` tinyint(1) NOT NULL DEFAULT '0', + `genome_id` int(11) NOT NULL, + `genome_group_id` int(11) NOT NULL, + `release_id` int(11) DEFAULT NULL, + `is_current` tinyint(1) NOT NULL DEFAULT '0', + PRIMARY KEY (`genome_group_member_id`), + UNIQUE KEY `unique_genome_group` (`genome_id`,`genome_group_id`), + KEY `idx_genome_id` (`genome_id`), + KEY `idx_genome_group_id` (`genome_group_id`), + KEY `idx_release_id` (`release_id`), + KEY `idx_is_current` (`is_current`), + CONSTRAINT `fk_ggm_genome` FOREIGN KEY (`genome_id`) REFERENCES `genome` (`genome_id`) ON DELETE CASCADE, + CONSTRAINT `fk_ggm_group` FOREIGN KEY (`genome_group_id`) REFERENCES `genome_group` (`genome_group_id`) ON DELETE CASCADE, + CONSTRAINT `fk_ggm_release` FOREIGN KEY (`release_id`) REFERENCES `ensembl_release` (`release_id`) ON DELETE SET NULL +) ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=COMPACT; -CREATE TABLE organism_group_member +CREATE TABLE `genome_release` ( - organism_group_member_id int auto_increment primary key, - is_reference tinyint(1) null, - organism_id int not null, - organism_group_id int not null, - `order` int null, - constraint organism_group_member_organism_id_organism_gro_fe8f49ac_uniq - unique (organism_id, organism_group_id), - constraint organism_group_membe_organism_group_id_533ca128_fk_organism_ - foreign key (organism_group_id) references organism_group (organism_group_id) - on delete cascade, - constraint organism_group_membe_organism_id_2808252e_fk_organism_ - foreign key (organism_id) references organism (organism_id) - on delete cascade -); + `genome_release_id` int(11) NOT NULL AUTO_INCREMENT, + `is_current` tinyint(1) NOT NULL, + `genome_id` int(11) NOT NULL, + `release_id` int(11) NOT NULL, + PRIMARY KEY (`genome_release_id`), + UNIQUE KEY `uk_genome_dataset` (`release_id`,`genome_id`), + KEY `genome_release_genome_id_3e45dc04_fk_genome_genome_id` (`genome_id`), + CONSTRAINT `genome_release_genome_id_3e45dc04_fk_genome_genome_id` FOREIGN KEY (`genome_id`) REFERENCES `genome` (`genome_id`), + CONSTRAINT `genome_release_release_id_bca7e1e5_fk_ensembl_release_release_id` FOREIGN KEY (`release_id`) REFERENCES `ensembl_release` (`release_id`) +) ENGINE=InnoDB AUTO_INCREMENT=31 DEFAULT CHARSET=latin1; CREATE TABLE `ncbi_taxa_name` ( `taxon_id` int(10) unsigned NOT NULL, @@ -280,4 +246,61 @@ CREATE TABLE `ncbi_taxa_node` ( KEY `rank` (`rank`), KEY `left_index` (`left_index`), KEY `right_index` (`right_index`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1; \ No newline at end of file +) ENGINE=InnoDB DEFAULT CHARSET=latin1; + +CREATE TABLE `organism` +( + `organism_id` int(11) NOT NULL AUTO_INCREMENT, + `taxonomy_id` int(11) NOT NULL, + `species_taxonomy_id` int(11) DEFAULT NULL, + `common_name` varchar(128) NOT NULL, + `strain` varchar(128) DEFAULT NULL, + `scientific_name` varchar(128) DEFAULT NULL, + `biosample_id` varchar(128) NOT NULL, + `scientific_parlance_name` varchar(255) DEFAULT NULL, + `organism_uuid` char(36) NOT NULL, + `strain_type` varchar(128) DEFAULT NULL, + `rank` int(11) DEFAULT '0', + PRIMARY KEY (`organism_id`), + UNIQUE KEY `ensembl_name` (`biosample_id`), + UNIQUE KEY `organism_uuid` (`organism_uuid`) +) ENGINE=InnoDB AUTO_INCREMENT=176 DEFAULT CHARSET=latin1; + +CREATE TABLE `organism_group` +( + `organism_group_id` int(11) NOT NULL AUTO_INCREMENT, + `type` varchar(32) DEFAULT NULL, + `name` varchar(255) NOT NULL, + `code` varchar(48) DEFAULT NULL, + PRIMARY KEY (`organism_group_id`), + UNIQUE KEY `code` (`code`), + UNIQUE KEY `organism_group_type_name_170b6dae_uniq` (`type`,`name`) +) ENGINE=InnoDB AUTO_INCREMENT=15 DEFAULT CHARSET=latin1; + +CREATE TABLE `organism_group_member` +( + `organism_group_member_id` int(11) NOT NULL AUTO_INCREMENT, + `is_reference` tinyint(1) NOT NULL DEFAULT '0', + `organism_id` int(11) NOT NULL, + `organism_group_id` int(11) NOT NULL, + `order` int(11) DEFAULT NULL, + PRIMARY KEY (`organism_group_member_id`), + UNIQUE KEY `organism_group_member_organism_id_organism_gro_fe8f49ac_uniq` (`organism_id`,`organism_group_id`), + KEY `organism_group_membe_organism_group_id_533ca128_fk_organism_` (`organism_group_id`), + CONSTRAINT `organism_group_membe_organism_group_id_533ca128_fk_organism_` FOREIGN KEY (`organism_group_id`) REFERENCES `organism_group` (`organism_group_id`) ON DELETE CASCADE, + CONSTRAINT `organism_group_membe_organism_id_2808252e_fk_organism_` FOREIGN KEY (`organism_id`) REFERENCES `organism` (`organism_id`) ON DELETE CASCADE +) ENGINE=InnoDB AUTO_INCREMENT=235 DEFAULT CHARSET=latin1; + +CREATE TABLE `sequence_alias` +( + `sequence_alias_id` int(11) NOT NULL AUTO_INCREMENT, + `assembly_sequence_id` int(11) NOT NULL, + `alias` varchar(128) NOT NULL, + `source` varchar(128) DEFAULT NULL, + PRIMARY KEY (`sequence_alias_id`), + UNIQUE KEY `unique_sequence_alias` (`assembly_sequence_id`,`alias`), + KEY `idx_alias` (`alias`), + KEY `idx_assembly_sequence_id` (`assembly_sequence_id`), + CONSTRAINT `fk_sa_assembly_sequence` FOREIGN KEY (`assembly_sequence_id`) REFERENCES `assembly_sequence` (`assembly_sequence_id`) ON DELETE CASCADE +) ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=COMPACT; + From 52512a37a2fd1d5213b82f5bddb52f1bae39d162 Mon Sep 17 00:00:00 2001 From: danielp Date: Thu, 16 Oct 2025 09:33:12 +0100 Subject: [PATCH 06/14] Full conversion of tests to sqlite3 --- .gitlab-ci.yml | 9 +- .travis.yml | 8 +- src/tests/README.md | 64 - src/tests/conftest.py | 116 +- src/tests/databases/compara_db/genome_db.txt | 18 - src/tests/databases/compara_db/table.sql | 19 - src/tests/databases/core_1/attrib_type.txt | 3 - src/tests/databases/core_1/coord_system.txt | 1 - src/tests/databases/core_1/meta.txt | 25 - src/tests/databases/core_1/seq_region.txt | 3 - .../databases/core_1/seq_region_attrib.txt | 8 - .../databases/core_1/seq_region_synonym.txt | 3 - src/tests/databases/core_1/table.sql | 67 - src/tests/databases/core_2/attrib_type.txt | 2 - src/tests/databases/core_2/coord_system.txt | 1 - src/tests/databases/core_2/meta.txt | 25 - src/tests/databases/core_2/seq_region.txt | 3 - .../databases/core_2/seq_region_attrib.txt | 6 - .../databases/core_2/seq_region_synonym.txt | 3 - src/tests/databases/core_2/table.sql | 86 - src/tests/databases/core_3/attrib_type.txt | 2 - src/tests/databases/core_3/coord_system.txt | 1 - src/tests/databases/core_3/meta.txt | 23 - src/tests/databases/core_3/seq_region.txt | 3 - .../databases/core_3/seq_region_attrib.txt | 6 - .../databases/core_3/seq_region_synonym.txt | 3 - src/tests/databases/core_3/table.sql | 86 - src/tests/databases/core_4/attrib_type.txt | 2 - src/tests/databases/core_4/coord_system.txt | 1 - src/tests/databases/core_4/meta.txt | 23 - src/tests/databases/core_4/seq_region.txt | 3 - .../databases/core_4/seq_region_attrib.txt | 6 - .../databases/core_4/seq_region_synonym.txt | 3 - src/tests/databases/core_4/table.sql | 87 - src/tests/databases/core_5/attrib_type.txt | 2 - src/tests/databases/core_5/coord_system.txt | 1 - src/tests/databases/core_5/meta.txt | 20 - src/tests/databases/core_5/seq_region.txt | 3 - .../databases/core_5/seq_region_attrib.txt | 6 - .../databases/core_5/seq_region_synonym.txt | 3 - src/tests/databases/core_5/table.sql | 87 - src/tests/databases/core_6/attrib_type.txt | 2 - src/tests/databases/core_6/coord_system.txt | 1 - src/tests/databases/core_6/meta.txt | 25 - src/tests/databases/core_6/seq_region.txt | 3 - .../databases/core_6/seq_region_attrib.txt | 6 - .../databases/core_6/seq_region_synonym.txt | 3 - src/tests/databases/core_6/table.sql | 86 - src/tests/databases/core_7/attrib_type.txt | 3 - src/tests/databases/core_7/coord_system.txt | 1 - src/tests/databases/core_7/meta.txt | 26 - src/tests/databases/core_7/seq_region.txt | 3 - .../databases/core_7/seq_region_attrib.txt | 8 - .../databases/core_7/seq_region_synonym.txt | 3 - src/tests/databases/core_7/table.sql | 86 - src/tests/databases/core_8/attrib_type.txt | 3 - src/tests/databases/core_8/coord_system.txt | 1 - src/tests/databases/core_8/meta.txt | 25 - src/tests/databases/core_8/seq_region.txt | 3 - .../databases/core_8/seq_region_attrib.txt | 8 - .../databases/core_8/seq_region_synonym.txt | 3 - src/tests/databases/core_8/table.sql | 86 - src/tests/databases/dump_mysql_db.py | 183 -- .../ensembl_genome_metadata/assembly.txt | 19 - .../assembly_sequence.txt | 113 - .../ensembl_genome_metadata/attribute.txt | 104 - .../ensembl_genome_metadata/dataset.txt | 499 ---- .../dataset_attribute.txt | 2246 ----------------- .../dataset_source.txt | 53 - .../ensembl_genome_metadata/dataset_type.txt | 34 - .../ensembl_release.txt | 6 - .../ensembl_genome_metadata/ensembl_site.txt | 1 - .../ensembl_genome_metadata/genome.txt | 20 - .../genome_dataset.txt | 499 ---- .../genome_release.txt | 30 - .../ncbi_taxa_name.txt | 469 ---- .../ncbi_taxa_node.txt | 64 - .../ensembl_genome_metadata/organism.txt | 11 - .../organism_group.txt | 8 - .../organism_group_member.txt | 17 - .../ensembl_genome_metadata/table.sql | 306 --- src/tests/databases/load_mysql_db.py | 227 -- src/tests/databases/mysql2sqlite.py | 244 -- .../ncbi_taxonomy/ncbi_taxa_name.txt | 472 ---- .../ncbi_taxonomy/ncbi_taxa_node.txt | 64 - src/tests/databases/ncbi_taxonomy/table.sql | 24 - 86 files changed, 41 insertions(+), 6898 deletions(-) delete mode 100644 src/tests/README.md delete mode 100644 src/tests/databases/compara_db/genome_db.txt delete mode 100644 src/tests/databases/compara_db/table.sql delete mode 100644 src/tests/databases/core_1/attrib_type.txt delete mode 100644 src/tests/databases/core_1/coord_system.txt delete mode 100644 src/tests/databases/core_1/meta.txt delete mode 100644 src/tests/databases/core_1/seq_region.txt delete mode 100644 src/tests/databases/core_1/seq_region_attrib.txt delete mode 100644 src/tests/databases/core_1/seq_region_synonym.txt delete mode 100644 src/tests/databases/core_1/table.sql delete mode 100644 src/tests/databases/core_2/attrib_type.txt delete mode 100644 src/tests/databases/core_2/coord_system.txt delete mode 100644 src/tests/databases/core_2/meta.txt delete mode 100644 src/tests/databases/core_2/seq_region.txt delete mode 100644 src/tests/databases/core_2/seq_region_attrib.txt delete mode 100644 src/tests/databases/core_2/seq_region_synonym.txt delete mode 100644 src/tests/databases/core_2/table.sql delete mode 100644 src/tests/databases/core_3/attrib_type.txt delete mode 100644 src/tests/databases/core_3/coord_system.txt delete mode 100644 src/tests/databases/core_3/meta.txt delete mode 100644 src/tests/databases/core_3/seq_region.txt delete mode 100644 src/tests/databases/core_3/seq_region_attrib.txt delete mode 100644 src/tests/databases/core_3/seq_region_synonym.txt delete mode 100644 src/tests/databases/core_3/table.sql delete mode 100644 src/tests/databases/core_4/attrib_type.txt delete mode 100644 src/tests/databases/core_4/coord_system.txt delete mode 100644 src/tests/databases/core_4/meta.txt delete mode 100644 src/tests/databases/core_4/seq_region.txt delete mode 100644 src/tests/databases/core_4/seq_region_attrib.txt delete mode 100644 src/tests/databases/core_4/seq_region_synonym.txt delete mode 100644 src/tests/databases/core_4/table.sql delete mode 100644 src/tests/databases/core_5/attrib_type.txt delete mode 100644 src/tests/databases/core_5/coord_system.txt delete mode 100644 src/tests/databases/core_5/meta.txt delete mode 100644 src/tests/databases/core_5/seq_region.txt delete mode 100644 src/tests/databases/core_5/seq_region_attrib.txt delete mode 100644 src/tests/databases/core_5/seq_region_synonym.txt delete mode 100644 src/tests/databases/core_5/table.sql delete mode 100644 src/tests/databases/core_6/attrib_type.txt delete mode 100644 src/tests/databases/core_6/coord_system.txt delete mode 100644 src/tests/databases/core_6/meta.txt delete mode 100644 src/tests/databases/core_6/seq_region.txt delete mode 100644 src/tests/databases/core_6/seq_region_attrib.txt delete mode 100644 src/tests/databases/core_6/seq_region_synonym.txt delete mode 100644 src/tests/databases/core_6/table.sql delete mode 100644 src/tests/databases/core_7/attrib_type.txt delete mode 100644 src/tests/databases/core_7/coord_system.txt delete mode 100644 src/tests/databases/core_7/meta.txt delete mode 100644 src/tests/databases/core_7/seq_region.txt delete mode 100644 src/tests/databases/core_7/seq_region_attrib.txt delete mode 100644 src/tests/databases/core_7/seq_region_synonym.txt delete mode 100644 src/tests/databases/core_7/table.sql delete mode 100644 src/tests/databases/core_8/attrib_type.txt delete mode 100644 src/tests/databases/core_8/coord_system.txt delete mode 100644 src/tests/databases/core_8/meta.txt delete mode 100644 src/tests/databases/core_8/seq_region.txt delete mode 100644 src/tests/databases/core_8/seq_region_attrib.txt delete mode 100644 src/tests/databases/core_8/seq_region_synonym.txt delete mode 100644 src/tests/databases/core_8/table.sql delete mode 100644 src/tests/databases/dump_mysql_db.py delete mode 100644 src/tests/databases/ensembl_genome_metadata/assembly.txt delete mode 100644 src/tests/databases/ensembl_genome_metadata/assembly_sequence.txt delete mode 100644 src/tests/databases/ensembl_genome_metadata/attribute.txt delete mode 100644 src/tests/databases/ensembl_genome_metadata/dataset.txt delete mode 100644 src/tests/databases/ensembl_genome_metadata/dataset_attribute.txt delete mode 100644 src/tests/databases/ensembl_genome_metadata/dataset_source.txt delete mode 100644 src/tests/databases/ensembl_genome_metadata/dataset_type.txt delete mode 100644 src/tests/databases/ensembl_genome_metadata/ensembl_release.txt delete mode 100644 src/tests/databases/ensembl_genome_metadata/ensembl_site.txt delete mode 100644 src/tests/databases/ensembl_genome_metadata/genome.txt delete mode 100644 src/tests/databases/ensembl_genome_metadata/genome_dataset.txt delete mode 100644 src/tests/databases/ensembl_genome_metadata/genome_release.txt delete mode 100644 src/tests/databases/ensembl_genome_metadata/ncbi_taxa_name.txt delete mode 100644 src/tests/databases/ensembl_genome_metadata/ncbi_taxa_node.txt delete mode 100644 src/tests/databases/ensembl_genome_metadata/organism.txt delete mode 100644 src/tests/databases/ensembl_genome_metadata/organism_group.txt delete mode 100644 src/tests/databases/ensembl_genome_metadata/organism_group_member.txt delete mode 100644 src/tests/databases/ensembl_genome_metadata/table.sql delete mode 100644 src/tests/databases/load_mysql_db.py delete mode 100644 src/tests/databases/mysql2sqlite.py delete mode 100644 src/tests/databases/ncbi_taxonomy/ncbi_taxa_name.txt delete mode 100644 src/tests/databases/ncbi_taxonomy/ncbi_taxa_node.txt delete mode 100644 src/tests/databases/ncbi_taxonomy/table.sql diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 4d59171d..a703b4ee 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,18 +1,11 @@ # .gitlab-ci.yml image: python:3.11 -variables: - MYSQL_ROOT_PASSWORD: "" - MYSQL_ALLOW_EMPTY_PASSWORD: "yes" - -services: - - mysql:8.0 stages: - test before_script: - - mysql -h mysql -u root -e "SET GLOBAL local_infile=1;" - python -m pip install --upgrade pip - pip install .[test] @@ -24,7 +17,7 @@ test: image: python:${PYTHON_VERSION} script: - echo "DB_HOST $METADATA_URI $TAXONOMY_URI" - - coverage run -m pytest -c pyproject.toml --server mysql://root@mysql:3306 + - coverage run -m pytest -c pyproject.toml coverage: '/TOTAL.*\s+(\d+%)$/' artifacts: reports: diff --git a/.travis.yml b/.travis.yml index e9b54ac1..4a13baad 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,14 +4,8 @@ dist: focal python: - '3.10' - '3.11' -services: - - mysql before_script: - # In MySQL 8, local_infile is disabled by default for security reasons. - # By adding SET GLOBAL local_infile=1;, we enable this feature at runtime. - - mysql -e "SET GLOBAL local_infile=1;" - pip install . - pip install .[test] script: - - echo "DB_HOST $METADATA_URI $TAXONOMY_URI" - - coverage run -m pytest -c pyproject.toml --server mysql://travis@127.0.0.1:3306 \ No newline at end of file + - coverage run -m pytest -c pyproject.toml \ No newline at end of file diff --git a/src/tests/README.md b/src/tests/README.md deleted file mode 100644 index e67add79..00000000 --- a/src/tests/README.md +++ /dev/null @@ -1,64 +0,0 @@ -Ensembl Genome Metadata Test Dataset -==================================== - -5 Releases spanning all status ------------------------------- - -| release\_id | version | release\_date | label | is\_current | release\_type | site\_id | status | -| :--- | :--- | :--- | :--- | :--- | :--- | :--- | :--- | -| 1 | 110.1 | 2023-10-18 | MVP Beta-1 | 1 | partial | 1 | Released | -| 2 | 110.2 | null | MVP Beta-2 | 0 | partial | 1 | Prepared | -| 3 | 110.3 | null | MVP Beta-3 | 0 | partial | 1 | Preparing | -| 4 | 112.0 | null | MVP Rel-1 | 0 | integrated | 1 | Planned | -| 5 | 108.0 | 2023-06-15 | First Beta | 0 | partial | 1 | Released | - - -First Beta - Released ---------------------- - -7 initial species present on the first beta public release (mid-2023) - `Released` - -Datasets: -- Datasets all `Released` -- A supplementary compara_homologies `Processed` - Attached to `Beta-2` (see below) - -Beta-1 - Released ------------------ - -3 more humans `Released` - -Datasets: -- Datasets all `Released` (assembly - genebuild - evidence - variation) -- Some with regulatory_features -- A supplementary compara_homologies `Processed` - Attached to `Beta-2` (see below) - -Beta-2 - Prepared ------------------ - -4 more human genomes attached to release - -Datasets: -- All datasets `Processed` (assembly - genebuild - variation - compara_homologies) -- No regulatory_features - -Beta-3 - Preparing ------------------- - -2 more humans attached to release - -Datasets - - - Assembly - Processed - - Genebuild - One Processed / One Processing - - Homologies - Submitted - -Beta-4 - Planed ---------------- - -3 more humans - not attached to any released - -Datasets - -- Assembly - Processed -- Genebuild - Submitted -- Homologies - Submitted \ No newline at end of file diff --git a/src/tests/conftest.py b/src/tests/conftest.py index fa90afcd..492f78f5 100644 --- a/src/tests/conftest.py +++ b/src/tests/conftest.py @@ -17,39 +17,18 @@ from ensembl.production.metadata.grpc import ensembl_metadata_pb2 -def pytest_addoption(parser): - """Add custom command line options.""" - parser.addoption( - "--use-sqlite-dbs", - action="store_true", - default=False, - help="Use pre-converted SQLite .db files instead of creating temporary databases", - ) - - def pytest_configure(config: Config) -> None: pytest.dbs_dir = Path(__file__).parent / "databases" -@pytest.fixture(scope="session") -def use_sqlite_dbs(request): - """Check if we should use SQLite .db files.""" - return request.config.getoption("--use-sqlite-dbs") - - @pytest.fixture(scope="module") -def test_dbs(request, use_sqlite_dbs): +def test_dbs(request): """ - Test database fixture - supports both SQLite and MySQL. + Test database fixture using SQLite databases. - With --use-sqlite-dbs: Uses pre-converted .db files (creates temporary copies for isolation) - Without flag: Creates temporary MySQL databases using UnitTestDB - - Note: SQLite databases are copied to temp directories to ensure test isolation. - Changes made during tests won't affect the original .db files. + Uses pre-converted .db files and creates temporary copies for test isolation. + Changes made during tests won't affect the original .db files. """ - from ensembl.utils.database import UnitTestDB - db_configs = request.param if hasattr(request, "param") else [] test_databases = {} temp_resources = [] # Track resources for cleanup @@ -58,67 +37,50 @@ def test_dbs(request, use_sqlite_dbs): src_path = db_config["src"] db_name = src_path.name - if use_sqlite_dbs: - # Use pre-converted SQLite .db files with temporary copies for isolation - sqlite_file = src_path.parent / f"{db_name}.db" - - if not sqlite_file.exists(): - raise FileNotFoundError( - f"SQLite database not found: {sqlite_file}\n" - f"Please convert it first using your conversion script." - ) + # Use pre-converted SQLite .db files with temporary copies for isolation + sqlite_file = src_path.parent / f"{db_name}.db" - # Create temporary copy to ensure test isolation - temp_dir = tempfile.mkdtemp(prefix=f"pytest_{db_name}_") - temp_db_file = Path(temp_dir) / f"{db_name}_test.db" + if not sqlite_file.exists(): + raise FileNotFoundError( + f"SQLite database not found: {sqlite_file}\n" + f"Please convert it first using your conversion script." + ) - print(f"\n>>> Using SQLite database: {sqlite_file}") - print(f" (temporary copy: {temp_db_file})") + # Create temporary copy to ensure test isolation + temp_dir = tempfile.mkdtemp(prefix=f"pytest_{db_name}_") + temp_db_file = Path(temp_dir) / f"{db_name}_test.db" - shutil.copy2(sqlite_file, temp_db_file) + print(f"\n>>> Using SQLite database: {sqlite_file}") + print(f" (temporary copy: {temp_db_file})") - # Create connection to temporary copy - db_url = f"sqlite:///{temp_db_file}" - test_databases[db_name] = type("TestDB", (object,), { - "dbc": DBConnection(db_url), - "drop": lambda: None, # Add no-op drop method for consistency - })() + shutil.copy2(sqlite_file, temp_db_file) - temp_resources.append((temp_db_file, temp_dir)) + # Create connection to temporary copy + db_url = f"sqlite:///{temp_db_file}" + test_databases[db_name] = type("TestDB", (object,), { + "dbc": DBConnection(db_url), + "drop": lambda: None, # No-op drop method for consistency + })() - else: - # Use MySQL with UnitTestDB (creates temporary databases from dumps) - server_url = request.config.getoption( - "--server", "mysql://ensembl@localhost:3306/?local_infile=1" - ) - print(f"\n>>> Creating temporary MySQL database from: {src_path}") - test_db = UnitTestDB(server_url, dump_dir=src_path, name=db_name) - test_databases[db_name] = test_db + temp_resources.append((temp_db_file, temp_dir)) yield test_databases - # Cleanup - if use_sqlite_dbs: - # Close SQLite connections and remove temporary files - for db_name, test_db in test_databases.items(): - if hasattr(test_db.dbc, 'dispose'): - test_db.dbc.dispose() - - # Remove temporary files and directories - for temp_file, temp_dir in temp_resources: - try: - if temp_file.exists(): - temp_file.unlink() - if Path(temp_dir).exists(): - shutil.rmtree(temp_dir) - print(f">>> Cleaned up temporary SQLite copy: {temp_dir}") - except Exception as e: - print(f"Warning: Failed to cleanup {temp_dir}: {e}") - else: - # Drop temporary MySQL databases - for db_name, test_db in test_databases.items(): - if hasattr(test_db, "drop"): - test_db.drop() + # Cleanup - close SQLite connections and remove temporary files + for db_name, test_db in test_databases.items(): + if hasattr(test_db.dbc, 'dispose'): + test_db.dbc.dispose() + + # Remove temporary files and directories + for temp_file, temp_dir in temp_resources: + try: + if temp_file.exists(): + temp_file.unlink() + if Path(temp_dir).exists(): + shutil.rmtree(temp_dir) + print(f">>> Cleaned up temporary SQLite copy: {temp_dir}") + except Exception as e: + print(f"Warning: Failed to cleanup {temp_dir}: {e}") @pytest.fixture(scope="module", autouse=True) diff --git a/src/tests/databases/compara_db/genome_db.txt b/src/tests/databases/compara_db/genome_db.txt deleted file mode 100644 index f4012ca9..00000000 --- a/src/tests/databases/compara_db/genome_db.txt +++ /dev/null @@ -1,18 +0,0 @@ -47 6239 caenorhabditis_elegans WBcel235 2014-10 0 0 \N strain N2 Caenorhabditis elegans (Nematode, N2) \N 110 \N -30 511145 escherichia_coli_str_k_12_substr_mg1655 ASM584v2 2018-09 0 0 \N strain K-12 substr. MG1655 Escherichia coli str. K-12 substr. MG1655 str. K12 (GCA_000005845) \N 110 \N -12 9606 homo_sapiens GRCh38 2023-03 0 0 \N \N Human \N 110 \N -11 9606 homo_sapiens_37 GRCh37 2013-09 0 0 \N \N Human \N 110 \N -23 9606 homo_sapiens_gca018469415v1 HG03516.alt.pat.f1_v2 2022-07 0 0 \N population Esan in Nigeria Homo sapiens (Human) - GCA_018469415.1 \N 110 \N -24 9606 homo_sapiens_gca018469425v1 HG03516.pri.mat.f1_v2 2022-07 0 0 \N population Esan in Nigeria Homo sapiens (Human) - GCA_018469425.1 \N 110 \N -31 9606 homo_sapiens_gca018469875v1 HG02622.pri.mat.f1_v2 2022-07 0 0 \N population Gambian in Western Division Homo sapiens (Human) - GCA_018469875.1 \N 110 \N -32 9606 homo_sapiens_gca018469925v1 HG02622.alt.pat.f1_v2 2022-07 0 0 \N population Gambian in Western Division Homo sapiens (Human) - GCA_018469925.1 \N 110 \N -19 9606 homo_sapiens_gca018473295v1 HG03540.pri.mat.f1_v2 2022-08 0 0 \N population Gambian in Western Division Homo sapiens (Human) - GCA_018473295.1 \N 110 \N -21 9606 homo_sapiens_gca018473315v1 HG03540.alt.pat.f1_v2 2022-08 0 0 \N population Gambian in Western Division Homo sapiens (Human) - GCA_018473315.1 \N 110 \N -42 9606 homo_sapiens_gca018505825v1 HG02109.pri.mat.f1_v2 2022-07 0 0 \N population African from Barbados Homo sapiens (Human) - GCA_018505825.1 \N 110 \N -46 9606 homo_sapiens_gca018505865v1 HG02109.alt.pat.f1_v2 2022-07 0 0 \N population African from Barbados Homo sapiens (Human) - GCA_018505865.1 \N 110 \N -6 9606 homo_sapiens_gca018852605v1 HG002.alt.pat.f1_v2 2022-07 0 0 \N population European Homo sapiens (Human) - GCA_018852605.1 \N 110 \N -7 9606 homo_sapiens_gca018852615v1 HG002.pri.mat.f1_v2 2022-07 0 0 \N population European Homo sapiens (Human) - GCA_018852615.1 \N 110 \N -8 9606 homo_sapiens_gca021950905v1 HG002.pat.cur.20211005 2022-07 0 0 \N population European Homo sapiens (Human) - GCA_021950905.1 \N 110 \N -9 9606 homo_sapiens_gca021951015v1 HG002.mat.cur.20211005 2022-07 0 0 \N population European Homo sapiens (Human) - GCA_021951015.1 \N 110 \N -16 559292 saccharomyces_cerevisiae R64-1-1 2018-10 0 0 \N strain S288C Saccharomyces cerevisiae \N 110 \N -20 4565 triticum_aestivum IWGSC 2018-04-IWGSC 0 0 \N cultivar Chinese Spring Triticum aestivum \N 110 \N \ No newline at end of file diff --git a/src/tests/databases/compara_db/table.sql b/src/tests/databases/compara_db/table.sql deleted file mode 100644 index eea37ffe..00000000 --- a/src/tests/databases/compara_db/table.sql +++ /dev/null @@ -1,19 +0,0 @@ -CREATE TABLE genome_db ( - genome_db_id INT unsigned NOT NULL AUTO_INCREMENT, # unique internal id - taxon_id INT unsigned DEFAULT NULL, # KF taxon.taxon_id - name varchar(128) DEFAULT '' NOT NULL, - assembly varchar(100) DEFAULT '' NOT NULL, - genebuild varchar(255) DEFAULT '' NOT NULL, - has_karyotype tinyint(1) NOT NULL DEFAULT 0, - is_good_for_alignment TINYINT(1) NOT NULL DEFAULT 0, - genome_component varchar(5) DEFAULT NULL, - strain_name varchar(100) DEFAULT NULL, - display_name varchar(255) DEFAULT NULL, - locator varchar(400), - first_release smallint, - last_release smallint, - - PRIMARY KEY (genome_db_id), - UNIQUE KEY name (name,assembly,genome_component) - -) COLLATE=latin1_swedish_ci ENGINE=MyISAM; \ No newline at end of file diff --git a/src/tests/databases/core_1/attrib_type.txt b/src/tests/databases/core_1/attrib_type.txt deleted file mode 100644 index c74a0bed..00000000 --- a/src/tests/databases/core_1/attrib_type.txt +++ /dev/null @@ -1,3 +0,0 @@ -6 toplevel Top Level Top Level Non-Redundant Sequence Region -316 circular_seq Circular sequence Circular chromosome or plasmid molecule -547 sequence_location sequence_location "To identify sequence locations / cellular compartments that DNA sequence comes from.Values are supposed to be SO compliant (children of the plastid_sequence SO:0000740 and nuclear_sequence SO:0000738 ): ""apicoplast_chromosome"", ""chloroplast_chromosome"", ""chromoplast_chromosome"", ""cyanelle_chromosome"", ""leucoplast_chromosome"", ""macronuclear_chromosome"", ""micronuclear_chromosome"", ""mitochondrial_chromosome"", ""nuclear_chromosome""." diff --git a/src/tests/databases/core_1/coord_system.txt b/src/tests/databases/core_1/coord_system.txt deleted file mode 100644 index 1a9e30b1..00000000 --- a/src/tests/databases/core_1/coord_system.txt +++ /dev/null @@ -1 +0,0 @@ -1 1 primary_assembly test 1 {'default_version', 'sequence_level'} diff --git a/src/tests/databases/core_1/meta.txt b/src/tests/databases/core_1/meta.txt deleted file mode 100644 index 4f4b4d06..00000000 --- a/src/tests/databases/core_1/meta.txt +++ /dev/null @@ -1,25 +0,0 @@ -26 \N schema_version 110 -12 1 assembly.accession GCF_1111111123.3 -25 1 assembly.alt_accession GCA_0000012345.3 -14 1 assembly.default jaber01 -13 1 assembly.name jaber01 -11 1 assembly.ucsc_alias SCARY -15 1 gencode.version 999 -28 1 genebuild.annotation_source ensembl -27 1 genebuild.last_geneset_update 2023-01 -23 1 genebuild.provider_name test -18 1 genebuild.sample_gene ENSAMXG00005000318 -19 1 genebuild.sample_location KB871578.1:9766653-9817473 -24 1 genebuild.start_date 2023-07-Ensembl -17 1 genebuild.version ENS01 -3 1 organism.common_name jabberwocky -7 1 organism.division Ensembl_TEST -6 1 organism.production_name Jabberwocky -4 1 organism.scientific_name carol_jabberwocky -1 1 organism.species_taxonomy_id 10029 -8 1 organism.strain reference -9 1 organism.strain_group testing -2 1 organism.taxonomy_id 10029 -10 1 organism.type monsters -5 1 organism.url Jabbe -20 1 strain.type test diff --git a/src/tests/databases/core_1/seq_region.txt b/src/tests/databases/core_1/seq_region.txt deleted file mode 100644 index e67ee5a7..00000000 --- a/src/tests/databases/core_1/seq_region.txt +++ /dev/null @@ -1,3 +0,0 @@ -1 TEST1_seqA 1 666666 -2 TEST2_seqB 1 666 -3 TEST3_seqC 1 1666666 diff --git a/src/tests/databases/core_1/seq_region_attrib.txt b/src/tests/databases/core_1/seq_region_attrib.txt deleted file mode 100644 index baac3467..00000000 --- a/src/tests/databases/core_1/seq_region_attrib.txt +++ /dev/null @@ -1,8 +0,0 @@ -1 6 1 -2 6 1 -3 6 1 -1 547 nuclear_chromosome -2 547 nuclear_chromosome -3 547 mitochondrial_chromosome -1 316 1 -2 316 0 diff --git a/src/tests/databases/core_1/seq_region_synonym.txt b/src/tests/databases/core_1/seq_region_synonym.txt deleted file mode 100644 index de43d915..00000000 --- a/src/tests/databases/core_1/seq_region_synonym.txt +++ /dev/null @@ -1,3 +0,0 @@ -1 1 TEST1_seq 50710 -2 2 TEST2_seq 50710 -3 3 TEST3_seq 50710 diff --git a/src/tests/databases/core_1/table.sql b/src/tests/databases/core_1/table.sql deleted file mode 100644 index 52941413..00000000 --- a/src/tests/databases/core_1/table.sql +++ /dev/null @@ -1,67 +0,0 @@ -CREATE TABLE `attrib_type` ( - `attrib_type_id` smallint(5) unsigned NOT NULL AUTO_INCREMENT, - `code` varchar(20) NOT NULL DEFAULT '', - `name` varchar(255) NOT NULL DEFAULT '', - `description` text, - PRIMARY KEY (`attrib_type_id`), - UNIQUE KEY `code_idx` (`code`) -) ENGINE=InnoDB AUTO_INCREMENT=548 DEFAULT CHARSET=latin1; - -CREATE TABLE `coord_system` -( - `coord_system_id` int(10) unsigned NOT NULL AUTO_INCREMENT, - `species_id` int(10) unsigned NOT NULL DEFAULT '1', - `name` varchar(40) NOT NULL, - `version` varchar(255) DEFAULT NULL, - `rank` int(11) NOT NULL, - `attrib` set('default_version','sequence_level') DEFAULT NULL, - PRIMARY KEY (`coord_system_id`), - UNIQUE KEY `rank_idx` (`rank`,`species_id`), - UNIQUE KEY `name_idx` (`name`,`version`,`species_id`), - KEY `species_idx` (`species_id`) -) ENGINE=InnoDB AUTO_INCREMENT=2 DEFAULT CHARSET=latin1; - -CREATE TABLE `meta` -( - `meta_id` int(11) NOT NULL AUTO_INCREMENT, - `species_id` int(10) unsigned DEFAULT '1', - `meta_key` varchar(40) NOT NULL, - `meta_value` varchar(255) NOT NULL, - PRIMARY KEY (`meta_id`), - UNIQUE KEY `species_key_value_idx` (`species_id`,`meta_key`,`meta_value`), - KEY `species_value_idx` (`species_id`,`meta_value`) -) ENGINE=InnoDB AUTO_INCREMENT=29 DEFAULT CHARSET=latin1; - -CREATE TABLE `seq_region` -( - `seq_region_id` int(10) unsigned NOT NULL AUTO_INCREMENT, - `name` varchar(255) NOT NULL, - `coord_system_id` int(10) unsigned NOT NULL, - `length` int(10) unsigned NOT NULL, - PRIMARY KEY (`seq_region_id`), - UNIQUE KEY `name_cs_idx` (`name`,`coord_system_id`), - KEY `cs_idx` (`coord_system_id`) -) ENGINE=InnoDB AUTO_INCREMENT=4 DEFAULT CHARSET=latin1; - -CREATE TABLE `seq_region_attrib` -( - `seq_region_id` int(10) unsigned NOT NULL DEFAULT '0', - `attrib_type_id` smallint(5) unsigned NOT NULL DEFAULT '0', - `value` text NOT NULL, - UNIQUE KEY `region_attribx` (`seq_region_id`,`attrib_type_id`,`value`(500)), - KEY `seq_region_idx` (`seq_region_id`), - KEY `type_val_idx` (`attrib_type_id`,`value`(40)), - KEY `val_only_idx` (`value`(40)) -) ENGINE=InnoDB DEFAULT CHARSET=latin1; - -CREATE TABLE `seq_region_synonym` -( - `seq_region_synonym_id` int(10) unsigned NOT NULL AUTO_INCREMENT, - `seq_region_id` int(10) unsigned NOT NULL, - `synonym` varchar(250) NOT NULL, - `external_db_id` int(10) unsigned DEFAULT NULL, - PRIMARY KEY (`seq_region_synonym_id`), - UNIQUE KEY `syn_idx` (`synonym`,`seq_region_id`), - KEY `seq_region_idx` (`seq_region_id`) -) ENGINE=InnoDB AUTO_INCREMENT=4 DEFAULT CHARSET=latin1; - diff --git a/src/tests/databases/core_2/attrib_type.txt b/src/tests/databases/core_2/attrib_type.txt deleted file mode 100644 index de5f1880..00000000 --- a/src/tests/databases/core_2/attrib_type.txt +++ /dev/null @@ -1,2 +0,0 @@ -6 toplevel Top Level Top Level Non-Redundant Sequence Region -547 sequence_location sequence_location To identify sequence locations / cellular compartments that DNA sequence comes from.Values are supposed to be SO compliant (children of the plastid_sequence SO:0000740 and nuclear_sequence SO:0000738 ): "apicoplast_chromosome", "chloroplast_chromosome", "chromoplast_chromosome", "cyanelle_chromosome", "leucoplast_chromosome", "macronuclear_chromosome", "micronuclear_chromosome", "mitochondrial_chromosome", "nuclear_chromosome". diff --git a/src/tests/databases/core_2/coord_system.txt b/src/tests/databases/core_2/coord_system.txt deleted file mode 100644 index 51314bf1..00000000 --- a/src/tests/databases/core_2/coord_system.txt +++ /dev/null @@ -1 +0,0 @@ -1 1 primary_assembly test 1 default_version,sequence_level diff --git a/src/tests/databases/core_2/meta.txt b/src/tests/databases/core_2/meta.txt deleted file mode 100644 index 29b7d700..00000000 --- a/src/tests/databases/core_2/meta.txt +++ /dev/null @@ -1,25 +0,0 @@ -12 1 assembly.accession weird01 -14 1 assembly.default jaber01 -13 1 assembly.name jaber01 -11 1 assembly.ucsc_alias SCARY -15 1 gencode.version 999 -16 1 genebuild.last_geneset_update 01 -3 1 organism.common_name jabberwocky -7 1 organism.division Ensembl_TEST -6 1 organism.production_name Jabberwocky -4 1 organism.scientific_name carol_jabberwocky -1 1 organism.species_taxonomy_id 6666666 -8 1 organism.strain reference -9 1 organism.strain_group testing -2 1 organism.taxonomy_id 666668 -10 1 organism.type monsters -5 1 organism.url Jabbe -17 1 genebuild.version ENS01 -18 1 genebuild.sample_gene ENSAMXG00005000318 -19 1 genebuild.sample_location KB871578.1:9766653-9817473 -20 1 strain.type test -21 1 genome.genome_uuid test -23 1 genebuild.provider_name test2 -24 1 genebuild.start_date 2023-07-Ensembl -25 \N schema_version 110 -26 1 genebuild.last_geneset_update 2023-01 diff --git a/src/tests/databases/core_2/seq_region.txt b/src/tests/databases/core_2/seq_region.txt deleted file mode 100644 index a2216feb..00000000 --- a/src/tests/databases/core_2/seq_region.txt +++ /dev/null @@ -1,3 +0,0 @@ -1 TEST1_seq 1 666666 -2 TEST2_seq 1 666 -3 TEST3_seq 1 1666666 diff --git a/src/tests/databases/core_2/seq_region_attrib.txt b/src/tests/databases/core_2/seq_region_attrib.txt deleted file mode 100644 index d8dcda33..00000000 --- a/src/tests/databases/core_2/seq_region_attrib.txt +++ /dev/null @@ -1,6 +0,0 @@ -1 6 1 -2 6 1 -3 6 1 -1 547 nuclear_chromosome -2 547 nuclear_chromosome -3 547 mitochondrial_chromosome diff --git a/src/tests/databases/core_2/seq_region_synonym.txt b/src/tests/databases/core_2/seq_region_synonym.txt deleted file mode 100644 index de43d915..00000000 --- a/src/tests/databases/core_2/seq_region_synonym.txt +++ /dev/null @@ -1,3 +0,0 @@ -1 1 TEST1_seq 50710 -2 2 TEST2_seq 50710 -3 3 TEST3_seq 50710 diff --git a/src/tests/databases/core_2/table.sql b/src/tests/databases/core_2/table.sql deleted file mode 100644 index 953da984..00000000 --- a/src/tests/databases/core_2/table.sql +++ /dev/null @@ -1,86 +0,0 @@ -CREATE TABLE coord_system -( - coord_system_id int unsigned auto_increment - primary key, - species_id int unsigned default 1 not null, - name varchar(40) not null, - version varchar(255) null, - `rank` int not null, - attrib set ('default_version', 'sequence_level') null, - constraint name_idx - unique (name, version, species_id), - constraint rank_idx - unique (`rank`, species_id) -); - -CREATE INDEX species_idx - on coord_system (species_id); - -CREATE TABLE meta -( - meta_id int auto_increment - primary key, - species_id int unsigned default 1 null, - meta_key varchar(40) not null, - meta_value varchar(255) not null, - constraint species_key_value_idx - unique (species_id, meta_key, meta_value) -); - -CREATE INDEX species_value_idx - on meta (species_id, meta_value); - -CREATE TABLE seq_region -( - seq_region_id int unsigned auto_increment - primary key, - name varchar(255) not null, - coord_system_id int unsigned not null, - length int unsigned not null, - constraint name_cs_idx - unique (name, coord_system_id) -); - -CREATE INDEX cs_idx - on seq_region (coord_system_id); - -CREATE TABLE seq_region_attrib -( - seq_region_id int unsigned default 0 not null, - attrib_type_id smallint unsigned default 0 not null, - value text not null, - constraint region_attribx - unique (seq_region_id, attrib_type_id, value(500)) -); - -CREATE INDEX seq_region_idx - on seq_region_attrib (seq_region_id); - -CREATE INDEX type_val_idx - on seq_region_attrib (attrib_type_id, value(40)); - -CREATE INDEX val_only_idx - on seq_region_attrib (value(40)); - -CREATE TABLE seq_region_synonym -( - seq_region_synonym_id int unsigned auto_increment - primary key, - seq_region_id int unsigned not null, - synonym varchar(250) not null, - external_db_id int unsigned null, - constraint syn_idx - unique (synonym, seq_region_id) -); - -CREATE INDEX seq_region_idx - on seq_region_synonym (seq_region_id); - -CREATE TABLE `attrib_type` ( - `attrib_type_id` smallint(5) unsigned NOT NULL AUTO_INCREMENT, - `code` varchar(20) NOT NULL DEFAULT '', - `name` varchar(255) NOT NULL DEFAULT '', - `description` text, - PRIMARY KEY (`attrib_type_id`), - UNIQUE KEY `code_idx` (`code`) -); \ No newline at end of file diff --git a/src/tests/databases/core_3/attrib_type.txt b/src/tests/databases/core_3/attrib_type.txt deleted file mode 100644 index de5f1880..00000000 --- a/src/tests/databases/core_3/attrib_type.txt +++ /dev/null @@ -1,2 +0,0 @@ -6 toplevel Top Level Top Level Non-Redundant Sequence Region -547 sequence_location sequence_location To identify sequence locations / cellular compartments that DNA sequence comes from.Values are supposed to be SO compliant (children of the plastid_sequence SO:0000740 and nuclear_sequence SO:0000738 ): "apicoplast_chromosome", "chloroplast_chromosome", "chromoplast_chromosome", "cyanelle_chromosome", "leucoplast_chromosome", "macronuclear_chromosome", "micronuclear_chromosome", "mitochondrial_chromosome", "nuclear_chromosome". diff --git a/src/tests/databases/core_3/coord_system.txt b/src/tests/databases/core_3/coord_system.txt deleted file mode 100644 index 51314bf1..00000000 --- a/src/tests/databases/core_3/coord_system.txt +++ /dev/null @@ -1 +0,0 @@ -1 1 primary_assembly test 1 default_version,sequence_level diff --git a/src/tests/databases/core_3/meta.txt b/src/tests/databases/core_3/meta.txt deleted file mode 100644 index 28ddd53d..00000000 --- a/src/tests/databases/core_3/meta.txt +++ /dev/null @@ -1,23 +0,0 @@ -12 1 assembly.accession weird02 -13 1 assembly.name jaber02 -11 1 assembly.ucsc_alias SCARYIER -14 1 gencode.version 999 -15 1 genebuild.last_geneset_update 2024-02 -3 1 organism.common_name jabberwocky -7 1 organism.division Ensembl_TEST -6 1 organism.production_name Jabberwocky -4 1 organism.scientific_name carol_jabberwocky -1 1 organism.species_taxonomy_id 6666666 -8 1 organism.strain reference -9 1 organism.strain_group testing -2 1 organism.taxonomy_id 666668 -10 1 organism.type monsters -5 1 organism.url Jabbe -17 1 genebuild.version ENS01 -18 1 genebuild.sample_gene ENSAMXG00005000318 -19 1 genebuild.sample_location KB871578.1:9766653-9817473 -20 1 strain.type test -23 1 genebuild.provider_name test -24 1 genebuild.start_date 2023-07-Ensembl -25 \N schema_version 110 -26 1 genebuild.last_geneset_update 2023-01 diff --git a/src/tests/databases/core_3/seq_region.txt b/src/tests/databases/core_3/seq_region.txt deleted file mode 100644 index a2216feb..00000000 --- a/src/tests/databases/core_3/seq_region.txt +++ /dev/null @@ -1,3 +0,0 @@ -1 TEST1_seq 1 666666 -2 TEST2_seq 1 666 -3 TEST3_seq 1 1666666 diff --git a/src/tests/databases/core_3/seq_region_attrib.txt b/src/tests/databases/core_3/seq_region_attrib.txt deleted file mode 100644 index d8dcda33..00000000 --- a/src/tests/databases/core_3/seq_region_attrib.txt +++ /dev/null @@ -1,6 +0,0 @@ -1 6 1 -2 6 1 -3 6 1 -1 547 nuclear_chromosome -2 547 nuclear_chromosome -3 547 mitochondrial_chromosome diff --git a/src/tests/databases/core_3/seq_region_synonym.txt b/src/tests/databases/core_3/seq_region_synonym.txt deleted file mode 100644 index de43d915..00000000 --- a/src/tests/databases/core_3/seq_region_synonym.txt +++ /dev/null @@ -1,3 +0,0 @@ -1 1 TEST1_seq 50710 -2 2 TEST2_seq 50710 -3 3 TEST3_seq 50710 diff --git a/src/tests/databases/core_3/table.sql b/src/tests/databases/core_3/table.sql deleted file mode 100644 index 953da984..00000000 --- a/src/tests/databases/core_3/table.sql +++ /dev/null @@ -1,86 +0,0 @@ -CREATE TABLE coord_system -( - coord_system_id int unsigned auto_increment - primary key, - species_id int unsigned default 1 not null, - name varchar(40) not null, - version varchar(255) null, - `rank` int not null, - attrib set ('default_version', 'sequence_level') null, - constraint name_idx - unique (name, version, species_id), - constraint rank_idx - unique (`rank`, species_id) -); - -CREATE INDEX species_idx - on coord_system (species_id); - -CREATE TABLE meta -( - meta_id int auto_increment - primary key, - species_id int unsigned default 1 null, - meta_key varchar(40) not null, - meta_value varchar(255) not null, - constraint species_key_value_idx - unique (species_id, meta_key, meta_value) -); - -CREATE INDEX species_value_idx - on meta (species_id, meta_value); - -CREATE TABLE seq_region -( - seq_region_id int unsigned auto_increment - primary key, - name varchar(255) not null, - coord_system_id int unsigned not null, - length int unsigned not null, - constraint name_cs_idx - unique (name, coord_system_id) -); - -CREATE INDEX cs_idx - on seq_region (coord_system_id); - -CREATE TABLE seq_region_attrib -( - seq_region_id int unsigned default 0 not null, - attrib_type_id smallint unsigned default 0 not null, - value text not null, - constraint region_attribx - unique (seq_region_id, attrib_type_id, value(500)) -); - -CREATE INDEX seq_region_idx - on seq_region_attrib (seq_region_id); - -CREATE INDEX type_val_idx - on seq_region_attrib (attrib_type_id, value(40)); - -CREATE INDEX val_only_idx - on seq_region_attrib (value(40)); - -CREATE TABLE seq_region_synonym -( - seq_region_synonym_id int unsigned auto_increment - primary key, - seq_region_id int unsigned not null, - synonym varchar(250) not null, - external_db_id int unsigned null, - constraint syn_idx - unique (synonym, seq_region_id) -); - -CREATE INDEX seq_region_idx - on seq_region_synonym (seq_region_id); - -CREATE TABLE `attrib_type` ( - `attrib_type_id` smallint(5) unsigned NOT NULL AUTO_INCREMENT, - `code` varchar(20) NOT NULL DEFAULT '', - `name` varchar(255) NOT NULL DEFAULT '', - `description` text, - PRIMARY KEY (`attrib_type_id`), - UNIQUE KEY `code_idx` (`code`) -); \ No newline at end of file diff --git a/src/tests/databases/core_4/attrib_type.txt b/src/tests/databases/core_4/attrib_type.txt deleted file mode 100644 index de5f1880..00000000 --- a/src/tests/databases/core_4/attrib_type.txt +++ /dev/null @@ -1,2 +0,0 @@ -6 toplevel Top Level Top Level Non-Redundant Sequence Region -547 sequence_location sequence_location To identify sequence locations / cellular compartments that DNA sequence comes from.Values are supposed to be SO compliant (children of the plastid_sequence SO:0000740 and nuclear_sequence SO:0000738 ): "apicoplast_chromosome", "chloroplast_chromosome", "chromoplast_chromosome", "cyanelle_chromosome", "leucoplast_chromosome", "macronuclear_chromosome", "micronuclear_chromosome", "mitochondrial_chromosome", "nuclear_chromosome". diff --git a/src/tests/databases/core_4/coord_system.txt b/src/tests/databases/core_4/coord_system.txt deleted file mode 100644 index 51314bf1..00000000 --- a/src/tests/databases/core_4/coord_system.txt +++ /dev/null @@ -1 +0,0 @@ -1 1 primary_assembly test 1 default_version,sequence_level diff --git a/src/tests/databases/core_4/meta.txt b/src/tests/databases/core_4/meta.txt deleted file mode 100644 index c0ecec65..00000000 --- a/src/tests/databases/core_4/meta.txt +++ /dev/null @@ -1,23 +0,0 @@ -12 1 assembly.accession weird02 -14 1 assembly.default jaber01 -13 1 assembly.name jaber01 -11 1 assembly.ucsc_alias SCARYIER -15 1 gencode.version 999 -3 1 organism.common_name jabberwocky -7 1 organism.division Ensembl_TEST -6 1 organism.production_name Jabberwocky -4 1 organism.scientific_name carol_jabberwocky -1 1 organism.species_taxonomy_id 6666666 -8 1 organism.strain reference -9 1 organism.strain_group testing -2 1 organism.taxonomy_id 666668 -10 1 organism.type monsters -5 1 organism.url Jabbe -17 1 genebuild.version ENS02 -18 1 genebuild.sample_gene ENSAMXG00005000318 -19 1 genebuild.sample_location KB871578.1:9766653-9817473 -20 1 strain.type test -23 1 genebuild.provider_name test -24 1 genebuild.start_date 2023-07-Ensembl -25 \N schema_version 110 -26 1 genebuild.last_geneset_update 2023-01 diff --git a/src/tests/databases/core_4/seq_region.txt b/src/tests/databases/core_4/seq_region.txt deleted file mode 100644 index a2216feb..00000000 --- a/src/tests/databases/core_4/seq_region.txt +++ /dev/null @@ -1,3 +0,0 @@ -1 TEST1_seq 1 666666 -2 TEST2_seq 1 666 -3 TEST3_seq 1 1666666 diff --git a/src/tests/databases/core_4/seq_region_attrib.txt b/src/tests/databases/core_4/seq_region_attrib.txt deleted file mode 100644 index d8dcda33..00000000 --- a/src/tests/databases/core_4/seq_region_attrib.txt +++ /dev/null @@ -1,6 +0,0 @@ -1 6 1 -2 6 1 -3 6 1 -1 547 nuclear_chromosome -2 547 nuclear_chromosome -3 547 mitochondrial_chromosome diff --git a/src/tests/databases/core_4/seq_region_synonym.txt b/src/tests/databases/core_4/seq_region_synonym.txt deleted file mode 100644 index de43d915..00000000 --- a/src/tests/databases/core_4/seq_region_synonym.txt +++ /dev/null @@ -1,3 +0,0 @@ -1 1 TEST1_seq 50710 -2 2 TEST2_seq 50710 -3 3 TEST3_seq 50710 diff --git a/src/tests/databases/core_4/table.sql b/src/tests/databases/core_4/table.sql deleted file mode 100644 index 22e5c915..00000000 --- a/src/tests/databases/core_4/table.sql +++ /dev/null @@ -1,87 +0,0 @@ -CREATE TABLE coord_system -( - coord_system_id int unsigned auto_increment - primary key, - species_id int unsigned default 1 not null, - name varchar(40) not null, - version varchar(255) null, - `rank` int not null, - attrib set ('default_version', 'sequence_level') null, - constraint name_idx - unique (name, version, species_id), - constraint rank_idx - unique (`rank`, species_id) -); - -CREATE INDEX species_idx - on coord_system (species_id); - -CREATE TABLE meta -( - meta_id int auto_increment - primary key, - species_id int unsigned default 1 null, - meta_key varchar(40) not null, - meta_value varchar(255) not null, - constraint species_key_value_idx - unique (species_id, meta_key, meta_value) -); - -CREATE INDEX species_value_idx - on meta (species_id, meta_value); - -CREATE TABLE seq_region -( - seq_region_id int unsigned auto_increment - primary key, - name varchar(255) not null, - coord_system_id int unsigned not null, - length int unsigned not null, - constraint name_cs_idx - unique (name, coord_system_id) -); - -CREATE INDEX cs_idx - on seq_region (coord_system_id); - -CREATE TABLE seq_region_attrib -( - seq_region_id int unsigned default 0 not null, - attrib_type_id smallint unsigned default 0 not null, - value text not null, - constraint region_attribx - unique (seq_region_id, attrib_type_id, value(500)) -); - -CREATE INDEX seq_region_idx - on seq_region_attrib (seq_region_id); - -CREATE INDEX type_val_idx - on seq_region_attrib (attrib_type_id, value(40)); - -CREATE INDEX val_only_idx - on seq_region_attrib (value(40)); - -CREATE TABLE seq_region_synonym -( - seq_region_synonym_id int unsigned auto_increment - primary key, - seq_region_id int unsigned not null, - synonym varchar(250) not null, - external_db_id int unsigned null, - constraint syn_idx - unique (synonym, seq_region_id) -); - -CREATE INDEX seq_region_idx - on seq_region_synonym (seq_region_id); - -CREATE TABLE `attrib_type` ( - `attrib_type_id` smallint(5) unsigned NOT NULL AUTO_INCREMENT, - `code` varchar(20) NOT NULL DEFAULT '', - `name` varchar(255) NOT NULL DEFAULT '', - `description` text, - PRIMARY KEY (`attrib_type_id`), - UNIQUE KEY `code_idx` (`code`) -); - diff --git a/src/tests/databases/core_5/attrib_type.txt b/src/tests/databases/core_5/attrib_type.txt deleted file mode 100644 index de5f1880..00000000 --- a/src/tests/databases/core_5/attrib_type.txt +++ /dev/null @@ -1,2 +0,0 @@ -6 toplevel Top Level Top Level Non-Redundant Sequence Region -547 sequence_location sequence_location To identify sequence locations / cellular compartments that DNA sequence comes from.Values are supposed to be SO compliant (children of the plastid_sequence SO:0000740 and nuclear_sequence SO:0000738 ): "apicoplast_chromosome", "chloroplast_chromosome", "chromoplast_chromosome", "cyanelle_chromosome", "leucoplast_chromosome", "macronuclear_chromosome", "micronuclear_chromosome", "mitochondrial_chromosome", "nuclear_chromosome". diff --git a/src/tests/databases/core_5/coord_system.txt b/src/tests/databases/core_5/coord_system.txt deleted file mode 100644 index 51314bf1..00000000 --- a/src/tests/databases/core_5/coord_system.txt +++ /dev/null @@ -1 +0,0 @@ -1 1 primary_assembly test 1 default_version,sequence_level diff --git a/src/tests/databases/core_5/meta.txt b/src/tests/databases/core_5/meta.txt deleted file mode 100644 index 3432d48d..00000000 --- a/src/tests/databases/core_5/meta.txt +++ /dev/null @@ -1,20 +0,0 @@ -12 1 assembly.accession test1 -14 1 assembly.default test846 -13 1 assembly.name test1 -11 1 assembly.ucsc_alias test1 -7 1 organism.division Ensembl_TEST -6 1 organism.production_name test_case_5 -4 1 organism.scientific_name Hominoide -8 1 organism.strain reference -9 1 organism.strain_group Hominoide -2 1 organism.taxonomy_id 9940 -10 1 organism.type monsters -5 1 organism.url Hominoide -17 1 genebuild.version ENS01 -18 1 genebuild.sample_gene ENSAMXG00005000318 -19 1 genebuild.sample_location KB871578.1:9766653-9817473 -20 1 strain.type test -23 1 genebuild.provider_name removed_for_test -24 1 genebuild.start_date 2023-07-Ensembl -25 \N schema_version 110 -26 1 genebuild.last_geneset_update 2023-01 diff --git a/src/tests/databases/core_5/seq_region.txt b/src/tests/databases/core_5/seq_region.txt deleted file mode 100644 index a2216feb..00000000 --- a/src/tests/databases/core_5/seq_region.txt +++ /dev/null @@ -1,3 +0,0 @@ -1 TEST1_seq 1 666666 -2 TEST2_seq 1 666 -3 TEST3_seq 1 1666666 diff --git a/src/tests/databases/core_5/seq_region_attrib.txt b/src/tests/databases/core_5/seq_region_attrib.txt deleted file mode 100644 index d8dcda33..00000000 --- a/src/tests/databases/core_5/seq_region_attrib.txt +++ /dev/null @@ -1,6 +0,0 @@ -1 6 1 -2 6 1 -3 6 1 -1 547 nuclear_chromosome -2 547 nuclear_chromosome -3 547 mitochondrial_chromosome diff --git a/src/tests/databases/core_5/seq_region_synonym.txt b/src/tests/databases/core_5/seq_region_synonym.txt deleted file mode 100644 index de43d915..00000000 --- a/src/tests/databases/core_5/seq_region_synonym.txt +++ /dev/null @@ -1,3 +0,0 @@ -1 1 TEST1_seq 50710 -2 2 TEST2_seq 50710 -3 3 TEST3_seq 50710 diff --git a/src/tests/databases/core_5/table.sql b/src/tests/databases/core_5/table.sql deleted file mode 100644 index 22e5c915..00000000 --- a/src/tests/databases/core_5/table.sql +++ /dev/null @@ -1,87 +0,0 @@ -CREATE TABLE coord_system -( - coord_system_id int unsigned auto_increment - primary key, - species_id int unsigned default 1 not null, - name varchar(40) not null, - version varchar(255) null, - `rank` int not null, - attrib set ('default_version', 'sequence_level') null, - constraint name_idx - unique (name, version, species_id), - constraint rank_idx - unique (`rank`, species_id) -); - -CREATE INDEX species_idx - on coord_system (species_id); - -CREATE TABLE meta -( - meta_id int auto_increment - primary key, - species_id int unsigned default 1 null, - meta_key varchar(40) not null, - meta_value varchar(255) not null, - constraint species_key_value_idx - unique (species_id, meta_key, meta_value) -); - -CREATE INDEX species_value_idx - on meta (species_id, meta_value); - -CREATE TABLE seq_region -( - seq_region_id int unsigned auto_increment - primary key, - name varchar(255) not null, - coord_system_id int unsigned not null, - length int unsigned not null, - constraint name_cs_idx - unique (name, coord_system_id) -); - -CREATE INDEX cs_idx - on seq_region (coord_system_id); - -CREATE TABLE seq_region_attrib -( - seq_region_id int unsigned default 0 not null, - attrib_type_id smallint unsigned default 0 not null, - value text not null, - constraint region_attribx - unique (seq_region_id, attrib_type_id, value(500)) -); - -CREATE INDEX seq_region_idx - on seq_region_attrib (seq_region_id); - -CREATE INDEX type_val_idx - on seq_region_attrib (attrib_type_id, value(40)); - -CREATE INDEX val_only_idx - on seq_region_attrib (value(40)); - -CREATE TABLE seq_region_synonym -( - seq_region_synonym_id int unsigned auto_increment - primary key, - seq_region_id int unsigned not null, - synonym varchar(250) not null, - external_db_id int unsigned null, - constraint syn_idx - unique (synonym, seq_region_id) -); - -CREATE INDEX seq_region_idx - on seq_region_synonym (seq_region_id); - -CREATE TABLE `attrib_type` ( - `attrib_type_id` smallint(5) unsigned NOT NULL AUTO_INCREMENT, - `code` varchar(20) NOT NULL DEFAULT '', - `name` varchar(255) NOT NULL DEFAULT '', - `description` text, - PRIMARY KEY (`attrib_type_id`), - UNIQUE KEY `code_idx` (`code`) -); - diff --git a/src/tests/databases/core_6/attrib_type.txt b/src/tests/databases/core_6/attrib_type.txt deleted file mode 100644 index de5f1880..00000000 --- a/src/tests/databases/core_6/attrib_type.txt +++ /dev/null @@ -1,2 +0,0 @@ -6 toplevel Top Level Top Level Non-Redundant Sequence Region -547 sequence_location sequence_location To identify sequence locations / cellular compartments that DNA sequence comes from.Values are supposed to be SO compliant (children of the plastid_sequence SO:0000740 and nuclear_sequence SO:0000738 ): "apicoplast_chromosome", "chloroplast_chromosome", "chromoplast_chromosome", "cyanelle_chromosome", "leucoplast_chromosome", "macronuclear_chromosome", "micronuclear_chromosome", "mitochondrial_chromosome", "nuclear_chromosome". diff --git a/src/tests/databases/core_6/coord_system.txt b/src/tests/databases/core_6/coord_system.txt deleted file mode 100644 index 51314bf1..00000000 --- a/src/tests/databases/core_6/coord_system.txt +++ /dev/null @@ -1 +0,0 @@ -1 1 primary_assembly test 1 default_version,sequence_level diff --git a/src/tests/databases/core_6/meta.txt b/src/tests/databases/core_6/meta.txt deleted file mode 100644 index e86f1e13..00000000 --- a/src/tests/databases/core_6/meta.txt +++ /dev/null @@ -1,25 +0,0 @@ -12 1 assembly.accession weird01 -14 1 assembly.default jaber01 -13 1 assembly.name jaber01 -11 1 assembly.ucsc_alias SCARY -15 1 gencode.version 999 -16 1 genebuild.last_geneset_update 01 -3 1 organism.common_name jabberwocky -7 1 organism.division Ensembl_TEST -6 1 organism.production_name Jabberwocky -4 1 organism.scientific_name carol_jabberwocky -1 1 organism.species_taxonomy_id 6666666 -8 1 organism.strain reference -9 1 organism.strain_group testing -2 1 organism.taxonomy_id 666668 -10 1 organism.type monsters -5 1 organism.url Jabbe -17 1 genebuild.version ENS01 -18 1 genebuild.sample_gene ENSAMXG00005000318 -19 1 genebuild.sample_location KB871578.1:9766653-9817473 -20 1 strain.type test -21 1 genome.genome_uuid 90720316-006c-470b-a7dd-82d28f952264 -23 1 genebuild.provider_name test -24 1 genebuild.start_date 2023-07-Ensembl -25 \N schema_version 110 -26 1 genebuild.last_geneset_update 2023-01 diff --git a/src/tests/databases/core_6/seq_region.txt b/src/tests/databases/core_6/seq_region.txt deleted file mode 100644 index a2216feb..00000000 --- a/src/tests/databases/core_6/seq_region.txt +++ /dev/null @@ -1,3 +0,0 @@ -1 TEST1_seq 1 666666 -2 TEST2_seq 1 666 -3 TEST3_seq 1 1666666 diff --git a/src/tests/databases/core_6/seq_region_attrib.txt b/src/tests/databases/core_6/seq_region_attrib.txt deleted file mode 100644 index d8dcda33..00000000 --- a/src/tests/databases/core_6/seq_region_attrib.txt +++ /dev/null @@ -1,6 +0,0 @@ -1 6 1 -2 6 1 -3 6 1 -1 547 nuclear_chromosome -2 547 nuclear_chromosome -3 547 mitochondrial_chromosome diff --git a/src/tests/databases/core_6/seq_region_synonym.txt b/src/tests/databases/core_6/seq_region_synonym.txt deleted file mode 100644 index de43d915..00000000 --- a/src/tests/databases/core_6/seq_region_synonym.txt +++ /dev/null @@ -1,3 +0,0 @@ -1 1 TEST1_seq 50710 -2 2 TEST2_seq 50710 -3 3 TEST3_seq 50710 diff --git a/src/tests/databases/core_6/table.sql b/src/tests/databases/core_6/table.sql deleted file mode 100644 index 953da984..00000000 --- a/src/tests/databases/core_6/table.sql +++ /dev/null @@ -1,86 +0,0 @@ -CREATE TABLE coord_system -( - coord_system_id int unsigned auto_increment - primary key, - species_id int unsigned default 1 not null, - name varchar(40) not null, - version varchar(255) null, - `rank` int not null, - attrib set ('default_version', 'sequence_level') null, - constraint name_idx - unique (name, version, species_id), - constraint rank_idx - unique (`rank`, species_id) -); - -CREATE INDEX species_idx - on coord_system (species_id); - -CREATE TABLE meta -( - meta_id int auto_increment - primary key, - species_id int unsigned default 1 null, - meta_key varchar(40) not null, - meta_value varchar(255) not null, - constraint species_key_value_idx - unique (species_id, meta_key, meta_value) -); - -CREATE INDEX species_value_idx - on meta (species_id, meta_value); - -CREATE TABLE seq_region -( - seq_region_id int unsigned auto_increment - primary key, - name varchar(255) not null, - coord_system_id int unsigned not null, - length int unsigned not null, - constraint name_cs_idx - unique (name, coord_system_id) -); - -CREATE INDEX cs_idx - on seq_region (coord_system_id); - -CREATE TABLE seq_region_attrib -( - seq_region_id int unsigned default 0 not null, - attrib_type_id smallint unsigned default 0 not null, - value text not null, - constraint region_attribx - unique (seq_region_id, attrib_type_id, value(500)) -); - -CREATE INDEX seq_region_idx - on seq_region_attrib (seq_region_id); - -CREATE INDEX type_val_idx - on seq_region_attrib (attrib_type_id, value(40)); - -CREATE INDEX val_only_idx - on seq_region_attrib (value(40)); - -CREATE TABLE seq_region_synonym -( - seq_region_synonym_id int unsigned auto_increment - primary key, - seq_region_id int unsigned not null, - synonym varchar(250) not null, - external_db_id int unsigned null, - constraint syn_idx - unique (synonym, seq_region_id) -); - -CREATE INDEX seq_region_idx - on seq_region_synonym (seq_region_id); - -CREATE TABLE `attrib_type` ( - `attrib_type_id` smallint(5) unsigned NOT NULL AUTO_INCREMENT, - `code` varchar(20) NOT NULL DEFAULT '', - `name` varchar(255) NOT NULL DEFAULT '', - `description` text, - PRIMARY KEY (`attrib_type_id`), - UNIQUE KEY `code_idx` (`code`) -); \ No newline at end of file diff --git a/src/tests/databases/core_7/attrib_type.txt b/src/tests/databases/core_7/attrib_type.txt deleted file mode 100644 index 59c569dd..00000000 --- a/src/tests/databases/core_7/attrib_type.txt +++ /dev/null @@ -1,3 +0,0 @@ -6 toplevel Top Level Top Level Non-Redundant Sequence Region -547 sequence_location sequence_location To identify sequence locations / cellular compartments that DNA sequence comes from.Values are supposed to be SO compliant (children of the plastid_sequence SO:0000740 and nuclear_sequence SO:0000738 ): "apicoplast_chromosome", "chloroplast_chromosome", "chromoplast_chromosome", "cyanelle_chromosome", "leucoplast_chromosome", "macronuclear_chromosome", "micronuclear_chromosome", "mitochondrial_chromosome", "nuclear_chromosome". -316 circular_seq Circular sequence Circular chromosome or plasmid molecule \ No newline at end of file diff --git a/src/tests/databases/core_7/coord_system.txt b/src/tests/databases/core_7/coord_system.txt deleted file mode 100644 index 51314bf1..00000000 --- a/src/tests/databases/core_7/coord_system.txt +++ /dev/null @@ -1 +0,0 @@ -1 1 primary_assembly test 1 default_version,sequence_level diff --git a/src/tests/databases/core_7/meta.txt b/src/tests/databases/core_7/meta.txt deleted file mode 100644 index 4ce72caa..00000000 --- a/src/tests/databases/core_7/meta.txt +++ /dev/null @@ -1,26 +0,0 @@ -12 1 assembly.accession test1 -14 1 assembly.default NewTest -13 1 assembly.name jaber01 -11 1 assembly.ucsc_alias test_alias -15 1 gencode.version 999 -16 1 genebuild.last_geneset_update 01 -3 1 organism.common_name jabberwocky -7 1 organism.division Ensembl_TEST -6 1 organism.production_name Jabberwocky -4 1 organism.scientific_name carol_jabberwocky -1 1 organism.species_taxonomy_id 6666666 -8 1 organism.strain reference -9 1 organism.strain_group testing -2 1 organism.taxonomy_id 666668 -10 1 organism.type monsters -5 1 organism.url Jabbe -17 1 genebuild.version ENS01 -18 1 genebuild.sample_gene ENSAMXG00005000318 -19 1 genebuild.sample_location KB871578.1:9766653-9817473 -20 1 strain.type test -23 1 genebuild.provider_name testfornamenew -24 1 genebuild.start_date 2023-08-Ensembl -25 1 genebuild.havana_datafreeze_date test2 -26 \N schema_version 110 -27 1 assembly.stats.total_coding_sequence_length 8989 -28 1 genebuild.last_geneset_update 2023-01 diff --git a/src/tests/databases/core_7/seq_region.txt b/src/tests/databases/core_7/seq_region.txt deleted file mode 100644 index 535c1393..00000000 --- a/src/tests/databases/core_7/seq_region.txt +++ /dev/null @@ -1,3 +0,0 @@ -1 TEST1_seq_update 1 666666 -2 TEST2_seq_update 1 666 -3 TEST3_seq_update 1 1666666 diff --git a/src/tests/databases/core_7/seq_region_attrib.txt b/src/tests/databases/core_7/seq_region_attrib.txt deleted file mode 100644 index aad2591e..00000000 --- a/src/tests/databases/core_7/seq_region_attrib.txt +++ /dev/null @@ -1,8 +0,0 @@ -1 6 1 -2 6 1 -3 6 1 -1 547 nuclear_chromosome -2 547 nuclear_chromosome -3 547 mitochondrial_chromosome -1 316 1 -2 316 0 \ No newline at end of file diff --git a/src/tests/databases/core_7/seq_region_synonym.txt b/src/tests/databases/core_7/seq_region_synonym.txt deleted file mode 100644 index de43d915..00000000 --- a/src/tests/databases/core_7/seq_region_synonym.txt +++ /dev/null @@ -1,3 +0,0 @@ -1 1 TEST1_seq 50710 -2 2 TEST2_seq 50710 -3 3 TEST3_seq 50710 diff --git a/src/tests/databases/core_7/table.sql b/src/tests/databases/core_7/table.sql deleted file mode 100644 index 953da984..00000000 --- a/src/tests/databases/core_7/table.sql +++ /dev/null @@ -1,86 +0,0 @@ -CREATE TABLE coord_system -( - coord_system_id int unsigned auto_increment - primary key, - species_id int unsigned default 1 not null, - name varchar(40) not null, - version varchar(255) null, - `rank` int not null, - attrib set ('default_version', 'sequence_level') null, - constraint name_idx - unique (name, version, species_id), - constraint rank_idx - unique (`rank`, species_id) -); - -CREATE INDEX species_idx - on coord_system (species_id); - -CREATE TABLE meta -( - meta_id int auto_increment - primary key, - species_id int unsigned default 1 null, - meta_key varchar(40) not null, - meta_value varchar(255) not null, - constraint species_key_value_idx - unique (species_id, meta_key, meta_value) -); - -CREATE INDEX species_value_idx - on meta (species_id, meta_value); - -CREATE TABLE seq_region -( - seq_region_id int unsigned auto_increment - primary key, - name varchar(255) not null, - coord_system_id int unsigned not null, - length int unsigned not null, - constraint name_cs_idx - unique (name, coord_system_id) -); - -CREATE INDEX cs_idx - on seq_region (coord_system_id); - -CREATE TABLE seq_region_attrib -( - seq_region_id int unsigned default 0 not null, - attrib_type_id smallint unsigned default 0 not null, - value text not null, - constraint region_attribx - unique (seq_region_id, attrib_type_id, value(500)) -); - -CREATE INDEX seq_region_idx - on seq_region_attrib (seq_region_id); - -CREATE INDEX type_val_idx - on seq_region_attrib (attrib_type_id, value(40)); - -CREATE INDEX val_only_idx - on seq_region_attrib (value(40)); - -CREATE TABLE seq_region_synonym -( - seq_region_synonym_id int unsigned auto_increment - primary key, - seq_region_id int unsigned not null, - synonym varchar(250) not null, - external_db_id int unsigned null, - constraint syn_idx - unique (synonym, seq_region_id) -); - -CREATE INDEX seq_region_idx - on seq_region_synonym (seq_region_id); - -CREATE TABLE `attrib_type` ( - `attrib_type_id` smallint(5) unsigned NOT NULL AUTO_INCREMENT, - `code` varchar(20) NOT NULL DEFAULT '', - `name` varchar(255) NOT NULL DEFAULT '', - `description` text, - PRIMARY KEY (`attrib_type_id`), - UNIQUE KEY `code_idx` (`code`) -); \ No newline at end of file diff --git a/src/tests/databases/core_8/attrib_type.txt b/src/tests/databases/core_8/attrib_type.txt deleted file mode 100644 index 59c569dd..00000000 --- a/src/tests/databases/core_8/attrib_type.txt +++ /dev/null @@ -1,3 +0,0 @@ -6 toplevel Top Level Top Level Non-Redundant Sequence Region -547 sequence_location sequence_location To identify sequence locations / cellular compartments that DNA sequence comes from.Values are supposed to be SO compliant (children of the plastid_sequence SO:0000740 and nuclear_sequence SO:0000738 ): "apicoplast_chromosome", "chloroplast_chromosome", "chromoplast_chromosome", "cyanelle_chromosome", "leucoplast_chromosome", "macronuclear_chromosome", "micronuclear_chromosome", "mitochondrial_chromosome", "nuclear_chromosome". -316 circular_seq Circular sequence Circular chromosome or plasmid molecule \ No newline at end of file diff --git a/src/tests/databases/core_8/coord_system.txt b/src/tests/databases/core_8/coord_system.txt deleted file mode 100644 index 51314bf1..00000000 --- a/src/tests/databases/core_8/coord_system.txt +++ /dev/null @@ -1 +0,0 @@ -1 1 primary_assembly test 1 default_version,sequence_level diff --git a/src/tests/databases/core_8/meta.txt b/src/tests/databases/core_8/meta.txt deleted file mode 100644 index 725a1ac8..00000000 --- a/src/tests/databases/core_8/meta.txt +++ /dev/null @@ -1,25 +0,0 @@ -12 1 assembly.accession GCA_000002985.3 -14 1 assembly.default jaber01 -13 1 assembly.name jaber01 -11 1 assembly.ucsc_alias SCARY -15 1 gencode.version 999 -16 1 genebuild.last_geneset_update 01 -3 1 organism.biosample_id SAMN04256190 -3 1 organism.common_name Caenorhabditis elegans (PRJNA13758) -7 1 organism.division Ensembl_TEST -6 1 organism.production_name Caenorhabditis_elegans -4 1 organism.scientific_name Caenorhabditis elegans -1 1 organism.species_taxonomy_id 6239 -8 1 organism.strain N2 -9 1 organism.strain_group testing -2 1 organism.taxonomy_id 6239 -10 1 organism.type monsters -5 1 organism.url Jabbe -17 1 genebuild.version EXT01 -18 1 genebuild.sample_gene ENSAMXG00005000318 -19 1 genebuild.sample_location KB871578.1:9766653-9817473 -20 1 strain.type test -23 1 genebuild.provider_name test -24 1 genebuild.start_date 2023-07-Ensembl -25 \N schema_version 110 -29 1 genebuild.last_geneset_update 2023-01 diff --git a/src/tests/databases/core_8/seq_region.txt b/src/tests/databases/core_8/seq_region.txt deleted file mode 100644 index e67ee5a7..00000000 --- a/src/tests/databases/core_8/seq_region.txt +++ /dev/null @@ -1,3 +0,0 @@ -1 TEST1_seqA 1 666666 -2 TEST2_seqB 1 666 -3 TEST3_seqC 1 1666666 diff --git a/src/tests/databases/core_8/seq_region_attrib.txt b/src/tests/databases/core_8/seq_region_attrib.txt deleted file mode 100644 index aad2591e..00000000 --- a/src/tests/databases/core_8/seq_region_attrib.txt +++ /dev/null @@ -1,8 +0,0 @@ -1 6 1 -2 6 1 -3 6 1 -1 547 nuclear_chromosome -2 547 nuclear_chromosome -3 547 mitochondrial_chromosome -1 316 1 -2 316 0 \ No newline at end of file diff --git a/src/tests/databases/core_8/seq_region_synonym.txt b/src/tests/databases/core_8/seq_region_synonym.txt deleted file mode 100644 index de43d915..00000000 --- a/src/tests/databases/core_8/seq_region_synonym.txt +++ /dev/null @@ -1,3 +0,0 @@ -1 1 TEST1_seq 50710 -2 2 TEST2_seq 50710 -3 3 TEST3_seq 50710 diff --git a/src/tests/databases/core_8/table.sql b/src/tests/databases/core_8/table.sql deleted file mode 100644 index 953da984..00000000 --- a/src/tests/databases/core_8/table.sql +++ /dev/null @@ -1,86 +0,0 @@ -CREATE TABLE coord_system -( - coord_system_id int unsigned auto_increment - primary key, - species_id int unsigned default 1 not null, - name varchar(40) not null, - version varchar(255) null, - `rank` int not null, - attrib set ('default_version', 'sequence_level') null, - constraint name_idx - unique (name, version, species_id), - constraint rank_idx - unique (`rank`, species_id) -); - -CREATE INDEX species_idx - on coord_system (species_id); - -CREATE TABLE meta -( - meta_id int auto_increment - primary key, - species_id int unsigned default 1 null, - meta_key varchar(40) not null, - meta_value varchar(255) not null, - constraint species_key_value_idx - unique (species_id, meta_key, meta_value) -); - -CREATE INDEX species_value_idx - on meta (species_id, meta_value); - -CREATE TABLE seq_region -( - seq_region_id int unsigned auto_increment - primary key, - name varchar(255) not null, - coord_system_id int unsigned not null, - length int unsigned not null, - constraint name_cs_idx - unique (name, coord_system_id) -); - -CREATE INDEX cs_idx - on seq_region (coord_system_id); - -CREATE TABLE seq_region_attrib -( - seq_region_id int unsigned default 0 not null, - attrib_type_id smallint unsigned default 0 not null, - value text not null, - constraint region_attribx - unique (seq_region_id, attrib_type_id, value(500)) -); - -CREATE INDEX seq_region_idx - on seq_region_attrib (seq_region_id); - -CREATE INDEX type_val_idx - on seq_region_attrib (attrib_type_id, value(40)); - -CREATE INDEX val_only_idx - on seq_region_attrib (value(40)); - -CREATE TABLE seq_region_synonym -( - seq_region_synonym_id int unsigned auto_increment - primary key, - seq_region_id int unsigned not null, - synonym varchar(250) not null, - external_db_id int unsigned null, - constraint syn_idx - unique (synonym, seq_region_id) -); - -CREATE INDEX seq_region_idx - on seq_region_synonym (seq_region_id); - -CREATE TABLE `attrib_type` ( - `attrib_type_id` smallint(5) unsigned NOT NULL AUTO_INCREMENT, - `code` varchar(20) NOT NULL DEFAULT '', - `name` varchar(255) NOT NULL DEFAULT '', - `description` text, - PRIMARY KEY (`attrib_type_id`), - UNIQUE KEY `code_idx` (`code`) -); \ No newline at end of file diff --git a/src/tests/databases/dump_mysql_db.py b/src/tests/databases/dump_mysql_db.py deleted file mode 100644 index c4cc1c8f..00000000 --- a/src/tests/databases/dump_mysql_db.py +++ /dev/null @@ -1,183 +0,0 @@ -#!/usr/bin/env python3 -""" -Dump MySQL database to table.sql and .txt files. -Creates the same format that load_mysql_db.py expects. -""" - -import argparse -import csv -from pathlib import Path -from urllib.parse import urlparse - -import mysql.connector -from mysql.connector import Error - - -def parse_mysql_uri(uri): - """Parse MySQL URI and return connection parameters.""" - parsed = urlparse(uri) - - return { - "host": parsed.hostname, - "port": parsed.port or 3306, - "user": parsed.username, - "password": parsed.password, - "database": parsed.path.lstrip("/"), - } - - -def get_table_create_statement(cursor, table_name): - """Get the CREATE TABLE statement for a table.""" - cursor.execute(f"SHOW CREATE TABLE `{table_name}`") - result = cursor.fetchone() - return result[1] # Second column is the CREATE TABLE statement - - -def dump_schema(cursor, output_dir): - """Dump all table schemas to table.sql.""" - # Get all tables in database - cursor.execute("SHOW TABLES") - tables = [row[0] for row in cursor.fetchall()] - - schema_file = output_dir / "table.sql" - - with open(schema_file, "w", encoding="utf-8") as f: - for table_name in tables: - create_stmt = get_table_create_statement(cursor, table_name) - f.write(create_stmt) - f.write(";\n\n") - - print(f"✓ Exported schema for {len(tables)} tables to {schema_file}") - return tables - - -def dump_table_data(cursor, table_name, output_dir): - """Dump table data to a tab-separated .txt file.""" - output_file = output_dir / f"{table_name}.txt" - - # Get all data from table - cursor.execute(f"SELECT * FROM `{table_name}`") - rows = cursor.fetchall() - - if not rows: - # Create empty file for consistency - output_file.touch() - return 0 - - # Write to TSV file - with open(output_file, "w", encoding="utf-8", newline="") as f: - writer = csv.writer(f, delimiter="\t", lineterminator="\n") - - for row in rows: - # Convert None to \N (MySQL NULL representation) - converted_row = ["\\N" if val is None else str(val) for val in row] - writer.writerow(converted_row) - - return len(rows) - - -def dump_database(mysql_url, output_dir, overwrite=False): - """ - Dump MySQL database to table.sql and .txt files. - - Args: - mysql_url: MySQL connection URL (mysql://user:pass@host:port/database) - output_dir: Output directory for schema and data files - overwrite: Whether to overwrite existing directory - """ - output_path = Path(output_dir) - - # Check if output directory exists - if output_path.exists(): - if not overwrite: - print(f"✗ Error: Directory {output_dir} already exists. Use --overwrite to replace it.") - return False - print(f"⚠ Overwriting existing directory: {output_dir}") - else: - output_path.mkdir(parents=True, exist_ok=True) - print(f"✓ Created output directory: {output_dir}") - - # Parse connection parameters - try: - conn_params = parse_mysql_uri(mysql_url) - db_name = conn_params["database"] - - if not db_name: - print("✗ Error: No database specified in URL") - print("Expected format: mysql://user:password@host:port/database_name") - return False - - print(f"\nDumping database: {db_name}") - print(f"MySQL Server: {conn_params['host']}:{conn_params['port']}") - print(f"Output directory: {output_dir}\n") - except Exception as e: - print(f"✗ Error parsing MySQL URI: {e}") - print("Expected format: mysql://user:password@host:port/database_name") - return False - - try: - # Connect to MySQL - conn = mysql.connector.connect(**conn_params) - cursor = conn.cursor() - - # Dump schema - tables = dump_schema(cursor, output_path) - - # Dump data for each table - print("\nExporting table data...") - total_rows = 0 - - for table_name in tables: - rows = dump_table_data(cursor, table_name, output_path) - total_rows += rows - print(f" ✓ {table_name}: {rows} rows") - - cursor.close() - conn.close() - - print(f"\n{'=' * 60}") - print(f"✓ Successfully dumped database: {db_name}") - print(f" - Schema: table.sql") - print(f" - Data: {len(tables)} tables, {total_rows} total rows") - print(f" - Location: {output_path.absolute()}") - return True - - except Error as e: - print(f"✗ MySQL Error: {e}") - return False - except Exception as e: - print(f"✗ Error: {e}") - return False - - -def main(): - parser = argparse.ArgumentParser( - description="Dump MySQL database to table.sql and .txt data files", - formatter_class=argparse.RawDescriptionHelpFormatter, - epilog=""" -Examples: - %(prog)s mysql://user:pass@host:port/my_database ./output_dir - %(prog)s mysql://ensadmin:ensembl@mysql-server:4508/test_core_1 databases/core_1 - %(prog)s mysql://user:pass@host/testdb ./testdb --overwrite - -The script creates: - - table.sql: Complete schema for all tables - - .txt: Tab-separated data for each table (no headers) - -This format is compatible with load_mysql_db.py for re-importing. - """, - ) - parser.add_argument("mysql_url", help="MySQL connection URL (mysql://user:password@host:port/database)") - parser.add_argument("output_dir", help="Output directory for schema and data files") - parser.add_argument( - "-o", "--overwrite", action="store_true", help="Overwrite output directory if it exists" - ) - - args = parser.parse_args() - - success = dump_database(args.mysql_url, args.output_dir, args.overwrite) - exit(0 if success else 1) - - -if __name__ == "__main__": - main() diff --git a/src/tests/databases/ensembl_genome_metadata/assembly.txt b/src/tests/databases/ensembl_genome_metadata/assembly.txt deleted file mode 100644 index 573ea4fe..00000000 --- a/src/tests/databases/ensembl_genome_metadata/assembly.txt +++ /dev/null @@ -1,19 +0,0 @@ -1 \N GCA_000005845.2 chromosome ASM584v2 \N ASM584v2 \N 2023-09-22 15:01:43 ASM584v2 532aa68f-6500-404e-a470-8afb718a770a 1 -4 \N GCA_018473315.1 primary_assembly HG03540.alt.pat.f1_v2 \N HG03540.alt.pat.f1_v2 \N 2023-09-22 15:02:00 HG03540.alt.pat.f1_v2 9d2dc346-358a-4c70-8fd8-3ff194246a76 0 -5 \N GCA_018469415.1 primary_assembly HG03516.alt.pat.f1_v2 \N HG03516.alt.pat.f1_v2 \N 2023-09-22 15:02:01 HG03516.alt.pat.f1_v2 1551e511-bde7-40cf-95cd-de4059678c6f 0 -6 \N GCA_018469875.1 primary_assembly HG02622.pri.mat.f1_v2 \N HG02622.pri.mat.f1_v2 \N 2023-09-22 15:02:02 HG02622.pri.mat.f1_v2 960de156-eced-4916-ac64-263d9a89dc3b 0 -7 \N GCA_018505825.1 primary_assembly HG02109.pri.mat.f1_v2 \N HG02109.pri.mat.f1_v2 \N 2023-09-22 15:02:04 HG02109.pri.mat.f1_v2 fc4e0ec5-7230-44b9-92aa-6788356158a8 0 -9 \N GCA_018852615.1 primary_assembly HG002.pri.mat.f1_v2 \N HG002.pri.mat.f1_v2 \N 2023-09-22 15:02:11 HG002.pri.mat.f1_v2 96b3f68d-d3d2-4107-a003-39cb0d67075f 0 -15 \N GCA_000002765.2 chromosome ASM276v2 \N ASM276v2 \N 2023-09-22 15:03:01 ASM276v2 23d2caa4-5120-4cc7-a73a-42aad4b6b1d9 1 -18 \N GCA_021950905.1 primary_assembly HG002.pat.cur.20211005 \N HG002.pat.cur.20211005 \N 2023-09-22 15:03:01 HG002.pat.cur.20211005 7a191f4e-0840-4aed-9302-8fab1157a361 0 -40 hg19 GCA_000001405.14 chromosome GRCh37.p13 \N GRCh37 \N 2023-09-22 15:03:21 GRCh37.p13 9d6b239c-46dd-4c79-bc29-1089f348d31d 0 -79 \N GCA_900519105.1 chromosome IWGSC \N IWGSC \N 2023-09-22 15:04:29 IWGSC 36d6c4f3-8072-4ae3-a485-84a070e725e3 1 -92 hg38 GCA_000001405.29 chromosome GRCh38.p14 \N GRCh38 \N 2023-09-22 15:04:45 GRCh38.p14 fd7fea38-981a-4d73-a879-6f9daef86f08 1 -97 \N GCA_018505865.1 primary_assembly HG02109.alt.pat.f1_v2 \N HG02109.alt.pat.f1_v2 \N 2023-09-22 15:04:50 HG02109.alt.pat.f1_v2 373c34c3-d482-4ebb-8f48-baee7c548583 0 -100 \N GCA_018852605.1 primary_assembly HG002.alt.pat.f1_v2 \N HG002.alt.pat.f1_v2 \N 2023-09-22 15:04:53 HG002.alt.pat.f1_v2 b6883d52-cc9d-43d2-bdbd-97bdaf903cec 0 -107 \N GCA_018469925.1 primary_assembly HG02622.alt.pat.f1_v2 \N HG02622.alt.pat.f1_v2 \N 2023-09-22 15:04:56 HG02622.alt.pat.f1_v2 a283efd6-d125-47df-8b3c-4757ae496231 0 -135 \N GCA_018469425.1 primary_assembly HG03516.pri.mat.f1_v2 \N HG03516.pri.mat.f1_v2 \N 2023-09-22 15:05:37 HG03516.pri.mat.f1_v2 93f7cd36-49e3-4c89-826b-3b2e4be0c40a 0 -180 \N GCA_021951015.1 primary_assembly HG002.mat.cur.20211005 \N HG002.mat.cur.20211005 \N 2023-09-22 15:06:39 HG002.mat.cur.20211005 696aa33e-f239-460e-9fcc-b6bb6908d726 0 -186 \N GCA_018473295.1 primary_assembly HG03540.pri.mat.f1_v2 \N HG03540.pri.mat.f1_v2 \N 2023-09-22 15:06:43 HG03540.pri.mat.f1_v2 8c71dc33-a49f-4be3-a3ad-4404fb374344 0 -216 \N GCA_000146045.2 chromosome R64-1-1 \N R64-1-1 \N 2023-09-22 15:06:55 R64-1-1 86cb493f-57cf-4c5a-8358-ef69952baf03 1 -219 \N GCA_000002985.3 chromosome WBcel235 \N WBcel235 \N 2023-09-22 15:06:58 WBcel235 2598e56f-a579-4fec-9525-0939563056bd 1 diff --git a/src/tests/databases/ensembl_genome_metadata/assembly_sequence.txt b/src/tests/databases/ensembl_genome_metadata/assembly_sequence.txt deleted file mode 100644 index a43b36a6..00000000 --- a/src/tests/databases/ensembl_genome_metadata/assembly_sequence.txt +++ /dev/null @@ -1,113 +0,0 @@ -1871 1 1 1 249250621 SO:0000738 1b22b98cdeb4a9304cb5d48026a85128 40 1 S_KjnFVz-FE7M0W6yoaUDgYxLPc1jyWU 0 primary_assembly 0 \N -1872 10 10 1 135534747 SO:0000738 988c28e000e84c26d552359af1ea2e1d 40 10 -BOZ8Esn8J88qDwNiSEwUr5425UXdiGX 0 primary_assembly 0 \N -1873 11 11 1 135006516 SO:0000738 98c59049a2df285c76ffb1c6db8f8b96 40 11 XXi2_O1ly-CCOi3HP5TypAw7LtC6niFG 0 primary_assembly 0 \N -1874 12 12 1 133851895 SO:0000738 51851ac0e1a115847ad36449b0015864 40 12 105bBysLoDFQHhajooTAUyUkNiZ8LJEH 0 primary_assembly 0 \N -1875 13 13 1 115169878 SO:0000738 283f8d7892baa81b510a015719ca7b0b 40 13 Ewb9qlgTqN6e_XQiRVYpoUfZJHXeiUfH 0 primary_assembly 0 \N -1876 14 14 1 107349540 SO:0000738 98f3cae32b2a2e9524bc19813927542e 40 14 5Ji6FGEKfejK1U6BMScqrdKJK8GqmIGf 0 primary_assembly 0 \N -1877 15 15 1 102531392 SO:0000738 e5645a794a8238215b2cd77acb95a078 40 15 zIMZb3Ft7RdWa5XYq0PxIlezLY2ccCgt 0 primary_assembly 0 \N -1878 16 16 1 90354753 SO:0000738 fc9b1a7b42b97a864f56b348b06095e6 40 16 W6wLoIFOn4G7cjopxPxYNk2lcEqhLQFb 0 primary_assembly 0 \N -1879 17 17 1 81195210 SO:0000738 351f64d4f4f9ddd45b35336ad97aa6de 40 17 AjWXsI7AkTK35XW9pgd3UbjpC3MAevlz 0 primary_assembly 0 \N -1880 18 18 1 78077248 SO:0000738 b15d4b2d29dde9d3e4f93d1d0f2cbc9c 40 18 BTj4BDaaHYoPhD3oY2GdwC_l0uqZ92UD 0 primary_assembly 0 \N -1893 GL000191.1 GL000191.1 0 106433 SO:0000738 d75b436f50a8214ee9c2a51d30b2c2cc 40 \N aX54PSRCZbj0EVn5QAH4zoO72gsmSTO8 0 primary_assembly 0 \N -1894 GL000192.1 GL000192.1 0 547496 SO:0000738 325ba9e808f669dfeee210fdd7b470ac 40 \N udMJQEKIyWPe8YGW1Dws6IHk_1NbkY9Y 0 primary_assembly 0 \N -1895 GL000193.1 GL000193.1 0 189789 SO:0000738 dbb6e8ece0b5de29da56601613007c2a 40 \N be3_RQlT0dXc4jYLkbEiRC6HSl7u1FjF 0 primary_assembly 0 \N -1896 GL000194.1 GL000194.1 0 191469 SO:0000738 6ac8f815bf8e845bb3031b73f812c012 40 \N WyYCLC4VxJvbBz2b_wBWF5BdQotiUVdB 0 primary_assembly 0 \N -1897 GL000195.1 GL000195.1 0 182896 SO:0000738 5d9ec007868d517e73543b005ba48535 40 \N 2LEWMcieZGf9Sx4VpEeWSDcULUVHGm0w 0 primary_assembly 0 \N -1955 1 1 1 248956422 SO:0000738 2648ae1bacce4ec4b6cf337dcae37816 92 1 2YnepKM7OkBoOrKmvHbGqguVfF9amCST 0 primary_assembly 0 \N -1956 10 10 1 133797422 SO:0000738 907112d17fcb73bcab1ed1c72b97ce68 92 10 P6q4sxSkFfKZpUgEwW73rx2a2ZYY-_pH 0 primary_assembly 0 \N -1957 11 11 1 135086622 SO:0000738 1511375dc2dd1b633af8cf439ae90cec 92 11 2NkFm8HK88MqeNkCgj78KidCAXgnsfV1 0 primary_assembly 0 \N -1958 12 12 1 133275309 SO:0000738 e81e16d3f44337034695a29b97708fce 92 12 7dzBrNZj_CM_Dg7zLl--e18KI8wVUxEd 0 primary_assembly 0 \N -1959 13 13 1 114364328 SO:0000738 17dab79b963ccd8e7377cef59a54fe1c 92 13 0qw_sn8Cl7OmMTFlukjFD2DUejW0T80Y 0 primary_assembly 0 \N -1960 14 14 1 107043718 SO:0000738 acbd9552c059d9b403e75ed26c1ce5bc 92 14 eK4D2MosgK_ivBkgi6FVPg5UXs1bYESm 0 primary_assembly 0 \N -1961 15 15 1 101991189 SO:0000738 f036bd11158407596ca6bf3581454706 92 15 AsXvWL1-2i5U_buw6_niVIxD6zTbAuS6 0 primary_assembly 0 \N -1962 16 16 1 90338345 SO:0000738 24e7cabfba3548a2bb4dff582b9ee870 92 16 EjrUp_S2oCd2b_SdqeZeOYUkEC966iVh 0 primary_assembly 0 \N -1963 17 17 1 83257441 SO:0000738 a8499ca51d6fb77332c2d242923994eb 92 17 upqChCoU-Gtd_61IidCsln-r8cxUTFeP 0 primary_assembly 0 \N -1964 18 18 1 80373285 SO:0000738 11eeaa801f6b0e2e36a1138616b8ee9a 92 18 vWwFhJ5lQDMhh-czg06YtlWqu0lvFAZV 0 primary_assembly 0 \N -53919 1 1 1 640851 SO:0000738 46d861cab72441c63589339b36e644ac 15 1 JfN51lUFS8sH2f2-OQ58B1f-ZFmxtjEO 0 primary_assembly 0 \N -53920 10 10 1 1687656 SO:0000738 7f3f93983d66669bf5e18266f2565aa4 15 10 nv-h1XrxCbnvlyAlLn612PX8kJBmpf9m 0 primary_assembly 0 \N -53921 11 11 1 2038340 SO:0000738 3733d85e3f9fd8c5284e70dc977950b7 15 11 GEeKuO07bJBfzpd4KKpQPasLKNWpBia6 0 primary_assembly 0 \N -53922 12 12 1 2271494 SO:0000738 81d7ade8026e3099dc4a595a27ce5fe4 15 12 qoUuu7OZJT_jdUqwbk_gk-yvozwqFNdQ 0 primary_assembly 0 \N -53923 13 13 1 2925236 SO:0000738 2e5f27c4aa13202464e20c520a90bddc 15 13 HuuhxwpHdyAZqDG3ex62hIuMZ-FHd5Io 0 primary_assembly 0 \N -53924 14 14 1 3291936 SO:0000738 2bfeb85b8b4486aa4b03c3cb4dc56dad 15 14 mJezSE0gh0LS5XVFlfR6PZIRw3-eLyAR 0 primary_assembly 0 \N -53925 2 2 1 947102 SO:0000738 3264ffcaf0fb7e7c6adf14088c866886 15 2 tpNy-HkCz6Cq62euyOlWNduv91huwg7p 0 primary_assembly 0 \N -53926 3 3 1 1067971 SO:0000738 1a2a92569065a83856362480f86aaf17 15 3 YqknWYibUjSC4bFoJwSTAcXKBC4uld3x 0 primary_assembly 0 \N -53927 4 4 1 1200490 SO:0000738 3bb29def1493995037b9426acc5cdc20 15 4 jETGmEu2CRUEy_e3SK9zDfszR9-a9UmS 0 primary_assembly 0 \N -53928 5 5 1 1343557 SO:0000738 bf203f82beeabc7337d1dcb50bca14cf 15 5 6UHvnqsyEKDTHAzF6FZ0OHgDVVeQ6AZv 0 primary_assembly 0 \N -53933 I I 1 15072434 SO:0000738 185711aa389cf8d9302ad2ab07bd31e0 219 1 jXmB22vaK688X3rr2HPNzTNxnkQuCTgF 0 primary_assembly 0 \N -53934 II II 1 15279421 SO:0000738 9e7e67d1e51cdb31791deab89dc31550 219 2 7k5F0INpuaUarMm8nyI6k0nSw46G65xC 0 primary_assembly 0 \N -53935 III III 1 13783801 SO:0000738 c0f1a58d2bf6ff6a16617839bbc5fe52 219 3 -CzkjDYLb3a8V06zz7_pT3JxsxxGsQO0 0 primary_assembly 0 \N -53936 IV IV 1 17493829 SO:0000738 2156ab555e19afd8a0ca5aba82fb2a2a 219 4 0Tm-TKE2JHxrM9avtCG0-9cTEnqpfw2b 0 primary_assembly 0 \N -53937 MtDNA MtDNA 1 13794 SO:0000737 199e147d502d88e45047413dc83c039c 219 7 L1L6FO8ESIZJBNTOTPK7GnZvJYiewKK0 0 primary_assembly 0 \N -53938 V V 1 20924180 SO:0000738 ffaf018f42f95375d2af6dcd402fef69 219 5 3ID7cGPgmNoJWDTn6-At5Geri2oVP6Rv 0 primary_assembly 0 \N -53939 X X 1 17718942 SO:0000738 dae3e9ec047e8147337b550dd8564d0b 219 6 mmZ9kKbTbAEyf09gNqHPQW4fZj8YiuTx 0 primary_assembly 0 \N -60019 Chromosome Chromosome 1 4641652 SO:0000738 482a2b04485ec8c4b5f4eaba2c2002da 1 7 NjjHtoQ2gYdy2RVkAZBKURBiV7xp-8ZS 0 primary_assembly 0 \N -60020 I I 1 230218 SO:0000738 6681ac2f62509cfc220d78751b8dc524 216 1 lZyxiD_ByprhOUzrR1o1bq0ezO_1gkrn 0 primary_assembly 0 \N -60021 II II 1 813184 SO:0000738 97a317c689cbdd7e92a5c159acd290d2 216 2 vw8jTiV5SAPDH4TEIZhNGylzNsQM4NC9 0 primary_assembly 0 \N -60022 III III 1 316620 SO:0000738 54f4a74aa6392d9e19b82c38aa8ab345 216 3 A_i2Id0FjBI-tQyU4ZaCEdxRzQheDevn 0 primary_assembly 0 \N -60023 IV IV 1 1531933 SO:0000738 74180788027e20df3de53dcb2367d9e3 216 4 QXSUMoZW_SSsCCN9_wc-xmubKQSOn3Qb 0 primary_assembly 0 \N -60024 IX IX 1 439888 SO:0000738 4eae53ae7b2029b7e1075461c3eb9aac 216 9 hb1scjdCWL89PtAkR0AVH9-dNH5R0FsN 0 primary_assembly 0 \N -60025 Mito Mito 1 85779 SO:0000737 71c39cf065b8d574f636b654c274cf1b 216 17 DrOlaWEY9iBBQrAAvbrzXsQlzZRV2J59 0 primary_assembly 0 \N -60026 V V 1 576874 SO:0000738 d2787193198c8d260f58f2097f9e1e39 216 5 UN_b-wij0EtsgFqQ2xNsbXs_GYQQIbeQ 0 primary_assembly 0 \N -60027 VI VI 1 270161 SO:0000738 b7ebc601f9a7df2e1ec5863deeae88a3 216 6 z-qJgWoacRBV77zcMgZN9E_utrdzmQsH 0 primary_assembly 0 \N -60028 VII VII 1 1090940 SO:0000738 a308c7ebf0b67c4926bc190dc4ba8ed8 216 7 9wkqGXgK6bvM0gcjBiTDk9tAaqOZojlR 0 primary_assembly 0 \N -60029 VIII VIII 1 562643 SO:0000738 f66a4f8eef89fc3c3a393fe0210169f1 216 8 K8ln7Ygob_lcVjNh-C8kUydzZjRt3UDf 0 primary_assembly 0 \N -3725167 JAGYYT010000001.1 JAGYYT010000001.1 0 46592869 SO:0000738 3ba11daa61cfe7d6244960d4bcc55113 5 \N lkW5j6Yeu6GL6xusZQCvq7KqO-3KGeh3 0 primary_assembly 0 \N -3725168 JAGYYT010000002.1 JAGYYT010000002.1 0 55482364 SO:0000738 bac357b106d364f8dabc169545765f92 5 \N iajXIfPoEJDR8BdUaRcI6LhzyZmgtXoA 0 primary_assembly 0 \N -3725169 JAGYYT010000003.1 JAGYYT010000003.1 0 24607739 SO:0000738 4ff9f520e63b14b64aede25a070baea9 5 \N CMg2W54uVjBnLWwm3xjUnBvyEXCvxyQh 0 primary_assembly 0 \N -3725170 JAGYYT010000004.1 JAGYYT010000004.1 0 10690193 SO:0000738 361cc6768f00f6bbb45ff12756c76cd1 5 \N I7OKvWCdpSHkxh_LZv3bdpb5sLcX_3IB 0 primary_assembly 0 \N -3725171 JAGYYT010000005.1 JAGYYT010000005.1 0 28045165 SO:0000738 725d218bfe0ce15239206f78f5604781 5 \N EdEqUHN3H05E-RlSmWi9SySm8JB90XzN 0 primary_assembly 0 \N -3725172 JAGYYT010000006.1 JAGYYT010000006.1 0 158663023 SO:0000738 514c27535b3840a2a2ee405f08cb3446 5 \N vZtOo2P9FiBega_X8LSl_0xj4dmJZmMs 0 primary_assembly 0 \N -3725173 JAGYYT010000007.1 JAGYYT010000007.1 0 46627313 SO:0000738 55301b7468e9cbc4d73a9253d752b652 5 \N HdIELGLdm6TPwPbrHfRlu_i2Nbs5w8Dp 0 primary_assembly 0 \N -3725174 JAGYYT010000008.1 JAGYYT010000008.1 0 1975142 SO:0000738 a54f58b59c6061d0c44273897a7c148e 5 \N VLql4yEZYIlHmzeHQLmlVrwou_7rBDxu 0 primary_assembly 0 \N -3725175 JAGYYT010000009.1 JAGYYT010000009.1 0 118296892 SO:0000738 ac1cbd267c1a1ce0eef59afd7fda6047 5 \N fTc-MuSRIDKYqLW4zFh7D9SR3UeAQ7tC 0 primary_assembly 0 \N -3725176 JAGYYT010000010.1 JAGYYT010000010.1 0 8989820 SO:0000738 bbab8e1c1a52042b195975018ff7271e 5 \N NRaeEfjaqeSr3XXxJss_Az8kfYEiJGUA 0 primary_assembly 0 \N -3727869 JAHAON010000001.1 JAHAON010000001.1 0 108267787 SO:0000738 2bfee5eba76ddf72a9ce1fe104dfb73a 6 \N nK5v5CSf3RTvSj3ynps2rwV9Qlwtq2XP 0 primary_assembly 0 \N -3727870 JAHAON010000002.1 JAHAON010000002.1 0 102298096 SO:0000738 136adb262594356fde2be5bf32d091ea 6 \N wn703GYvypvgDg3Nwmg-MI6xqflTex53 0 primary_assembly 0 \N -3727871 JAHAON010000003.1 JAHAON010000003.1 0 28141884 SO:0000738 d2044e19e173fc0af1271a71a20c19e7 6 \N eoWhZT0jMNBS9d50Sg02HkWe2TJA-Rga 0 primary_assembly 0 \N -3727872 JAHAON010000004.1 JAHAON010000004.1 0 40727531 SO:0000738 c7a23b01e734c6b22fdd078e97c6f1da 6 \N 2klgq3Y-GrPMLPHsGfkeE7lMuAjbnaxe 0 primary_assembly 0 \N -3727873 JAHAON010000005.1 JAHAON010000005.1 0 111718856 SO:0000738 c3acd9fa5d4a02da10007a9b71d49f0d 6 \N 4h8XGzCJ6_JvovqGcLZ4HGz-WMOcnfwY 0 primary_assembly 0 \N -3727874 JAHAON010000006.1 JAHAON010000006.1 0 89895720 SO:0000738 c24fc746780a67eba106ec07563849f9 6 \N jSJ3PqRBMXsYUlSdMZUOagnljy_QRUFJ 0 primary_assembly 0 \N -3727875 JAHAON010000007.1 JAHAON010000007.1 0 39819246 SO:0000738 08a128df8dd3c67aa90fe81490cf9a80 6 \N Ao235-Ye0mxGwEwvzEo94ejs8Gk2l72e 0 primary_assembly 0 \N -3727876 JAHAON010000008.1 JAHAON010000008.1 0 32367248 SO:0000738 b7fbc0bd6188d59f429e25ee07ab0e8d 6 \N mSJYKw654SvQGfz9HKxNvDP1VWiSK3zp 0 primary_assembly 0 \N -3727877 JAHAON010000009.1 JAHAON010000009.1 0 56661561 SO:0000738 7ae11c0c030017c8c2d35d67c9ac9316 6 \N Crj1lvdkJ4Tlm0Q8CEgkGZ6vmxadaopS 0 primary_assembly 0 \N -3727878 JAHAON010000010.1 JAHAON010000010.1 0 139507333 SO:0000738 255d7b0f6a9f1f7d5f171b50eada5d6b 6 \N leJMsG-aQiRxi_QrgNomPJ3Wjpins2Ej 0 primary_assembly 0 \N -3742614 JAGYVY010000001.1 JAGYVY010000001.1 0 51866122 SO:0000738 c67e160f076badff0d3c09289f711944 4 \N lGYmQZArBanljWYhufm3YzWp46jnEE39 0 primary_assembly 0 \N -3742615 JAGYVY010000002.1 JAGYVY010000002.1 0 8986677 SO:0000738 c18c0a8433faef15c8947b862607f41e 4 \N 93cXvE8ygIE1LwyeejKgJ2jUBgi0fe8Y 0 primary_assembly 0 \N -3742616 JAGYVY010000003.1 JAGYVY010000003.1 0 47249189 SO:0000738 59d40cdafc3b0d91fe836a49ffe7f591 4 \N recqxURiYRbP6f9yq6ck8pWH3o6dPXuh 0 primary_assembly 0 \N -3742617 JAGYVY010000004.1 JAGYVY010000004.1 0 55363342 SO:0000738 3f9b3c2935d8657fadf86a9c8b6c44e3 4 \N wsdlsVY07wFILtVM4y1mQToK3WsC0x25 0 primary_assembly 0 \N -3742618 JAGYVY010000005.1 JAGYVY010000005.1 0 12137054 SO:0000738 56b31b794c9d1115a51ae703cae480b1 4 \N QidyPw5USOpHj12iV3zUJp2mOERzzcMQ 0 primary_assembly 0 \N -3742619 JAGYVY010000006.1 JAGYVY010000006.1 0 54505167 SO:0000738 7fb60047ffae535b15057cb346e543f2 4 \N gfqraPB64YaWSK9LuPzNybVqzY_kJzBv 0 primary_assembly 0 \N -3742620 JAGYVY010000007.1 JAGYVY010000007.1 0 24869350 SO:0000738 2990dbdb7dd770e092a6baf2a4d57f04 4 \N EY9mIHACnljr9Akv3C9VuXbHTSDuY-rr 0 primary_assembly 0 \N -3742621 JAGYVY010000008.1 JAGYVY010000008.1 0 42967410 SO:0000738 7fc5b200bbaae4ce510058280e83a2e9 4 \N 8VytWiKv7yC_sKVPrpIcMMFN_hCYU9dU 0 primary_assembly 0 \N -3742622 JAGYVY010000009.1 JAGYVY010000009.1 0 18572896 SO:0000738 4087350722474ec6169ec1da9fca6e73 4 \N O2GMCPifUdXp2QpLqixsqWLWdJSLPRdv 0 primary_assembly 0 \N -3742623 JAGYVY010000010.1 JAGYVY010000010.1 0 6843817 SO:0000738 f54904e00811656ff76eed21370fdfcc 4 \N IyX4rd_pWEXWQ0J8jsVFwKdedzCi9_oM 0 primary_assembly 0 \N -3752279 JAHEPF010000001.1 JAHEPF010000001.1 0 34747916 SO:0000738 9cd36c56739382f5ccd8bf05d7b7a782 7 \N _g7GaDOEVsjK_hf11hZ4ky3pVZTIHINa 0 primary_assembly 0 \N -3752280 JAHEPF010000002.1 JAHEPF010000002.1 0 35554520 SO:0000738 a3e576310e6fc76eb80a394291fb3204 7 \N dc-s25qNY-HORolo9d0iwoecf2ozWQlE 0 primary_assembly 0 \N -3752281 JAHEPF010000003.1 JAHEPF010000003.1 0 33855561 SO:0000738 3d070bacf47cee60c022d565c170b6d4 7 \N YnmaKNuXOkW8WvsAMno_7XxJ3vFFCIMP 0 primary_assembly 0 \N -3752282 JAHEPF010000004.1 JAHEPF010000004.1 0 5384975 SO:0000738 90425435effabb9ed63a2bb2b360a4b2 7 \N teu3AdwsFzzqP2CCTmxwxal5AiQxJX5D 0 primary_assembly 0 \N -3752283 JAHEPF010000005.1 JAHEPF010000005.1 0 47328102 SO:0000738 4d3dcad460c7997151ab3caf27af487b 7 \N F9p0cz2HQjiWrzGFKJZbzAy5wknT13nd 0 primary_assembly 0 \N -3752284 JAHEPF010000006.1 JAHEPF010000006.1 0 26659419 SO:0000738 48d097797ada812bfb466838fbddb0d7 7 \N L3gpNYdi6RFEMs_Pzkr_ZGo-E7pfiDFu 0 primary_assembly 0 \N -3752285 JAHEPF010000007.1 JAHEPF010000007.1 0 20832236 SO:0000738 ca81ce1541e88ae3b27a4ab3a6190510 7 \N dxDQLWXbm7mipB0rFMhevOvyRuaTqnws 0 primary_assembly 0 \N -3752286 JAHEPF010000008.1 JAHEPF010000008.1 0 27516148 SO:0000738 aff3c5deddf48410710cee142d10ba7e 7 \N mjrFMbEfGqydPt4vCe_azkbYwGYYpFnA 0 primary_assembly 0 \N -3752287 JAHEPF010000009.1 JAHEPF010000009.1 0 12081732 SO:0000738 5b38d7b54682b639c168d7b661f9c876 7 \N qTEIGxHpPXRcqPNOhUBwVpSuWSVLCZyU 0 primary_assembly 0 \N -3752288 JAHEPF010000010.1 JAHEPF010000010.1 0 4432623 SO:0000738 682b6eafb4b94dfad5d124873ac50812 7 \N nA8EZeMykBVcjHvhcIlhhiWM7ylPm-_g 0 primary_assembly 0 \N -3760113 JAHKSD010000001.1 JAHKSD010000001.1 0 110635364 SO:0000738 3dc28bf6013947644e3aa841763c7631 9 \N igWakb948tcC73JOgGzs-SDwWLKKuleI 0 primary_assembly 0 \N -3760114 JAHKSD010000002.1 JAHKSD010000002.1 0 1186550 SO:0000738 bc667c2ec5c2dc662a767e540fafa0c1 9 \N NFg11cJVWZmoQeeJR-oNyB5QT8Cg6_w- 0 primary_assembly 0 \N -3760115 JAHKSD010000003.1 JAHKSD010000003.1 0 32898 SO:0000738 1d4ad8c5a00a00dbb6ad0b968dbe365f 9 \N GNtztMSKoX5-PG1zYvEE0qyowc8akI3J 0 primary_assembly 0 \N -3760116 JAHKSD010000004.1 JAHKSD010000004.1 0 111658246 SO:0000738 88bb1aa0877ac906791c96551f542cef 9 \N Tu05HwWwxYR9xPqLU7QUnGrAOCKlMUmX 0 primary_assembly 0 \N -3760117 JAHKSD010000005.1 JAHKSD010000005.1 0 139957525 SO:0000738 de84bdeaebb942f9f0ebc57fbe60680d 9 \N jphEshZT4l8fr4HMvXAwu6EsqM3Ud8YQ 0 primary_assembly 0 \N -3760118 JAHKSD010000006.1 JAHKSD010000006.1 0 104451682 SO:0000738 d6a6387b078f4170e723032b48d7f8b6 9 \N SF8WSrHIwx3iITPRWUFqDHkZk6p35Rlu 0 primary_assembly 0 \N -3760119 JAHKSD010000007.1 JAHKSD010000007.1 0 93427 SO:0000738 3eb7b09435ea2d5e3421cdb77f24fcb1 9 \N OvwIE2BbB6aKm0uNNq5cXZ9lSQpoqVIX 0 primary_assembly 0 \N -3760120 JAHKSD010000008.1 JAHKSD010000008.1 0 50570566 SO:0000738 d4a40b2b51cd0291b7ec047ca614a953 9 \N zVEcf4soxkzJkLVNcib3nnGaPOxi4cBb 0 primary_assembly 0 \N -3760121 JAHKSD010000009.1 JAHKSD010000009.1 0 1212238 SO:0000738 6b31e1467a52b4747751e3d155bde949 9 \N 6SQJJlA7VRnm-L_Pf2F-a6TqUnO1IbsW 0 primary_assembly 0 \N -3760122 JAHKSD010000010.1 JAHKSD010000010.1 0 100646410 SO:0000738 9391399f48bde664b20f9b8dca808704 9 \N 07Ugr7jsN9jhBD3JbtYMNh79DDxOPjio 0 primary_assembly 0 \N -3785686 1A 1A 1 594102056 SO:0000738 1e85cfd7774c4118a84f1dd62783b31d 79 1 d1TidPwqmfZ775SEnWe1DyCPcKNpYpFO 0 primary_assembly 0 \N -3785687 1B 1B 1 689851870 SO:0000738 b917173c52104915e78845d137d922d0 79 2 8WfzIibnnlG1L1iNPZ3Sk0uiwIMK4znm 0 primary_assembly 0 \N -3785688 1D 1D 1 495453186 SO:0000738 cef89d6e535210757cb10e504cbf9b03 79 3 y3u4DW3vBcXYTjtMBVhsyN7Ly7Rc2dFk 0 primary_assembly 0 \N -3785689 2A 2A 1 780798557 SO:0000738 080bb4a5ff38e4849bf446fbbe40000a 79 4 2PQ-iGfRjPsojv1K9g18dQfDzNO2lyXq 0 primary_assembly 0 \N -3785690 2B 2B 1 801256715 SO:0000738 8a52f592bb8a4f44438f7791dcca142a 79 5 keeRxrxBxos9oB3Adk47VryL12KtzINt 0 primary_assembly 0 \N -3785691 2D 2D 1 651852609 SO:0000738 3fc8c6b5ea64445d7fba64ac55719895 79 6 Gt6hPn3IJboGQ-mwMXzSITaPuYAkfYiD 0 primary_assembly 0 \N -3785692 3A 3A 1 750843639 SO:0000738 606b5e6749208700ccd9ec246449a1ac 79 7 Rm2Xzny0tMfgjPqmTa7EDn1BYJfcgk66 0 primary_assembly 0 \N -3785693 3B 3B 1 830829764 SO:0000738 7bae7b0ef4dabf3d7456de792263713c 79 8 Xrjc9MtZuG34jFBE4xY6VuhGKa6G41ya 0 primary_assembly 0 \N -3785694 3D 3D 1 615552423 SO:0000738 e7feee9ffc854a18889517e36b1fc257 79 9 s-CnQy24wXYDP0EsRUji7tvIkdfnF2qN 0 primary_assembly 0 \N -3785695 4A 4A 1 744588157 SO:0000738 0f0ac12903101a6d0c6b417066f4fc5d 79 10 Qz1gdFRd4l6QXrOlcreln873gbns69Q0 0 primary_assembly 0 \N diff --git a/src/tests/databases/ensembl_genome_metadata/attribute.txt b/src/tests/databases/ensembl_genome_metadata/attribute.txt deleted file mode 100644 index afd4abff..00000000 --- a/src/tests/databases/ensembl_genome_metadata/attribute.txt +++ /dev/null @@ -1,104 +0,0 @@ -1 assembly.accession assembly.accession assembly.accession string 1 -2 assembly.stats.chromosomes Chromosomes or plasmids Number of structures in cells containing DNA integer 0 -3 assembly.stats.component_sequences Component sequences Part of the primary sequences in assembly integer 0 -4 assembly.stats.contig_n50 Contig N50 Median size of contigs in a genome assembly bp 0 -5 assembly.date assembly.date assembly.date string 0 -6 assembly.default assembly.default assembly.default string 0 -7 assembly.stats.gc_percentage Average GC content Percentage of nucleotides in DNA that are G or C percent 0 -8 assembly.is_reference assembly.is_reference assembly.is_reference string 0 -9 assembly.level assembly.level assembly.level string 0 -10 assembly.mapping assembly.mapping assembly.mapping string 0 -11 assembly.name assembly.name assembly.name string 1 -12 assembly.provider_name assembly.provider_name assembly.provider_name string 0 -13 assembly.provider_url assembly.provider_url assembly.provider_url string 0 -14 assembly.stats.spanned_gaps Spanned gaps Number of gaps covered by sequencing reads integer 0 -15 assembly.tolid assembly.tolid assembly.tolid string 0 -16 assembly.stats.toplevel_sequences Top level sequences Primary sequences in a genome assembly integer 0 -17 assembly.stats.total_coding_sequence_length Total coding sequence length Total length of all coding sequences bp 0 -18 assembly.stats.total_gap_length Total gap length Total length of all gaps in a genome assembly bp 0 -19 assembly.stats.total_genome_length Total genome length Total length of all genomic sequences bp 0 -20 assembly.ucsc_alias assembly.ucsc_alias assembly.ucsc_alias string 0 -21 genebuild.stats.average_cds_length Average CDS length Average length of coding sequences float 0 -22 genebuild.stats.average_coding_exons_per_coding_gene Average coding exons per coding gene Average coding exons per coding gene string 0 -23 genebuild.stats.average_coding_exons_per_transcript Average coding exons per transcript Average coding exons per coding transcript float 0 -24 genebuild.stats.average_coding_exon_length Average exon length per coding gene Average length of coding exons bp 0 -25 genebuild.stats.average_exon_length Average exon length Average length of exons bp 0 -26 genebuild.stats.average_genomic_span Average coding genomic span Average length of all genomic regions bp 0 -27 genebuild.stats.average_intron_length Average intron length Average intron length per coding gene bp 0 -28 genebuild.stats.average_sequence_legth Average coding sequence length Average length of sequences in genome bp 0 -29 genebuild.stats.coding_genes Coding genes Genes that code for proteins integer 0 -30 genebuild.stats.coding_transcripts Coding transcripts Transcripts that code for proteins integer 0 -31 genebuild.stats.coding_transcripts_per_gene Average coding transcripts per gene Average coding transcripts per gene float 0 -32 genebuild.hash genebuild.hash genebuild.hash string 0 -33 genebuild.initial_release_date genebuild.initial_release_date genebuild.initial_release_date string 0 -34 genebuild.last_geneset_update genebuild.last_geneset_update genebuild.last_geneset_update string 1 -35 genebuild.level genebuild.level genebuild.level string 0 -36 genebuild.longest_gene_length Longest coding gene Length of longest gene bp 0 -37 genebuild.method genebuild.method genebuild.method string 0 -38 genebuild.method_display genebuild.method_display genebuild.method_display string 0 -39 genebuild.stats.nc_average_exons_per_transcript Average exons per non-coding transcript Mean exon count per transcript float 0 -40 genebuild.stats.nc_average_exon_length Average exon length per non-coding transcript Mean exon length bp 0 -41 genebuild.stats.nc_average_genomic_span Average non-coding genomic span Mean length of all genomic regions bp 0 -42 genebuild.stats.nc_average_sequence_length Average non-coding sequence length Mean length of all sequences bp 0 -43 genebuild.stats.nc_longest_gene_length Longest non-coding gene Length of longest non-coding gene bp 0 -44 genebuild.stats.nc_long_non_coding_genes Long non-coding genes Long genes not coding for proteins integer 0 -45 genebuild.stats.nc_misc_non_coding_genes Misc. non-coding genes Miscellaneous non-coding genes integer 0 -46 genebuild.stats.nc_non_coding_genes Non-coding genes Genes that don't code for proteins integer 0 -47 genebuild.stats.nc_shortest_gene_length Shortest non-coding gene Length of shortest gene bp 0 -48 genebuild.stats.nc_small_non_coding_genes Small non-coding genes Small genes not coding for proteins integer 0 -49 genebuild.stats.nc_total_introns Introns in non-coding genes Total intron count integer 0 -50 genebuild.stats.nc_total_transcripts Non-coding transcripts Total RNA transcript count integer 0 -51 genebuild.stats.nc_transcripts_per_gene Average transcripts per non-coding gene Mean transcripts count per gene float 0 -52 genebuild.stats.ps_average_exons_per_transcript Average intron length per pseudogene Mean exon count per pseudogene transcript float 0 -53 genebuild.stats.ps_average_exon_length Average exon length per pseudogene Mean pseudogene exon length bp 0 -54 genebuild.stats.ps_average_genomic_span Average pseudogene genomic span Mean length of pseudogene regions bp 0 -55 genebuild.stats.ps_average_intron_length Average intron length per pseudogene Mean pseudogene intron length bp 0 -56 genebuild.stats.ps_average_sequence_length Average pseudogene sequence length Mean length of pseudogene sequences bp 0 -57 genebuild.stats.ps_longest_gene_length Longest pseudogene Length of longest pseudogene bp 0 -58 genebuild.stats.ps_pseudogenes Pseudogenes Genes which don't code functional protiens integer 0 -59 genebuild.stats.ps_shortest_gene_length Shortest pseudogene Length of shortest pseudogene bp 0 -60 genebuild.stats.ps_total_exons Exons in pseudogenes Total exon count in pseudogenes integer 0 -61 genebuild.stats.ps_total_introns Introns in pseudogenes Total intron count in pseudogenes integer 0 -62 genebuild.stats.ps_total_transcripts Transcripts in pseudogenes Total pseudogene RNA transcript count integer 0 -63 genebuild.stats.ps_transcripts_per_gene Average transcripts per pseudogene Mean pseudogene transcripts count per pseudogene float 0 -64 genebuild.stats.shortest_gene_length Shortest coding gene Length of shortest gene bp 0 -65 genebuild.start_date genebuild.start_date genebuild.start_date string 1 -66 genebuild.stats.total_coding_exons Exons in coding genes Total number of coding exons integer 0 -67 genebuild.stats.total_exons Exons in genes Total number of exons integer 0 -68 genebuild.stats.total_introns Introns in coding genes Total number of introns integer 0 -69 genebuild.stats.total_transcripts Transcripts in coding genes Total number of RNA transcripts integer 0 -70 genebuild.stats.transcripts_per_gene Average transcripts per coding gene Average number of transcripts per gene float 0 -71 genebuild.version genebuild.version genebuild.version string 1 -72 genebuild.sample_gene genebuild.sample_gene Sample Gene Data string 1 -73 genebuild.sample_location genebuild.sample_location Sample Location Data string 1 -74 assembly.stats.coverage_depth assembly.coverage_depth assembly.coverage_depth string 0 -75 assembly.web_accession_source assembly.web_accession_source assembly.web_accession_source string 0 -76 assembly.web_accession_type assembly.web_accession_type assembly.web_accession_type string 0 -77 genebuild.id genebuild.id genebuild.id string 0 -78 genebuild.stats.nc_average_intron_length Average intron length per non-coding transcript Mean intron length bp 0 -79 genebuild.projection_source_db genebuild.projection_source_db genebuild.projection_source_db string 0 -80 assembly.long_name assembly.long_name assembly.long_name string 0 -81 assembly.url_name assembly.url_name assembly.url_name string 0 -82 genebuild.havana_datafreeze_date genebuild.havana_datafreeze_date genebuild.havana_datafreeze_date string 0 -83 assembly.version assembly.version assembly.version string 0 -84 genebuild.provider_name genebuild.provider_name genebuild.provider_name string 1 -85 genebuild.provider_url genebuild.provider_url genebuild.provider_url string 1 -119 variation.short_variants Short variants Small-scale genetic variations integer 0 -120 variation.sample_variant variation.sample_variant variation.sample_variant string 0 -123 variation.short_variants_with_phenotype_assertions Short variation with phenotype assertion Short variation with phenotype assertion string 0 -161 compara.stats.homology_coverage compara.homology_coverage compara.homology_coverage float 0 -162 compara.homology_reference_species compara.homology_reference_species compara.homology_reference_species string 0 -163 regulation.stats.open_chromatin_count regulation.open_chromatin_count Number of open chromatin regions integer 0 -164 regulation.stats.promoter_count regulation.promoter_count Number of promoters integer 0 -165 regulation.stats.enhancer_count regulation.enhancer_count Number of enhancers integer 0 -166 regulation.stats.ctcf_count regulation.ctcf_count Number of CTCF binding sites integer 0 -167 regulation.stats.tfbs_count regulation.tfbs_count Number of regions enriched for transcription factor binding integer 0 -168 assembly.tol_id assembly.tol_id assembly.tol_id string 0 -169 genebuild.annotation_source genebuild.annotation_source genebuild.annotation_source string 1 -170 genebuild.stats.nc_total_exons Exons in non-coding genes Total exon count integer 0 -179 assembly.description assembly.description assembly.description string 0 -180 assembly.master_accession assembly.master_accession assembly.master_accession string 0 -181 assembly.alt_accession assembly.alt_accession assembly.alt_accession string 0 -182 dataset.build_start Dataset Build start date Dataset Build start date string 0 -183 dataset.build_end Dataset Build completed Dataset Build completed string 0 -197 genebuild.provider_version genebuild.provider_version genebuild.provider_version string 0 diff --git a/src/tests/databases/ensembl_genome_metadata/dataset.txt b/src/tests/databases/ensembl_genome_metadata/dataset.txt deleted file mode 100644 index cd62f5be..00000000 --- a/src/tests/databases/ensembl_genome_metadata/dataset.txt +++ /dev/null @@ -1,499 +0,0 @@ -1 02104faf-3fee-4f28-b53c-605843dac941 assembly \N 2023-09-22 15:01:44 GCA_000005845.2 1 1 Released \N -2 cc3c7f95-b5dc-4cc1-aa15-2817c89bd1e2 genebuild EXT01 2023-09-22 15:01:44 GCA_000005845.2_EXT01 1 2 Released \N -7 dc1b508e-f148-4a6f-b051-1a0c53142cf5 assembly \N 2023-09-22 15:02:00 GCA_018473315.1 4 1 Released \N -9 45aec801-4fe7-4ac2-9afa-19aea2a8409e assembly \N 2023-09-22 15:02:01 GCA_018469415.1 5 1 Released \N -11 5cda83f4-521c-4713-b2fe-b3ede2f1a51e assembly \N 2023-09-22 15:02:02 GCA_018469875.1 6 1 Processed \N -13 3f9bf8d6-1514-4657-9f73-38a7354a80b8 assembly \N 2023-09-22 15:02:04 GCA_018505825.1 7 1 Processed \N -14 53936715-1371-4343-95af-f39d06943db7 genebuild ENS01 2023-09-22 15:02:04 GCA_018505825.1_ENS01 7 2 Processed \N -17 d641779c-2add-46ce-acf4-a2b6f15274b1 assembly \N 2023-09-22 15:02:11 GCA_018852615.1 9 1 Processed \N -23 06b4892b-8e34-49bc-be84-8126e5a7cf93 assembly \N 2023-09-22 15:03:01 GCA_000002765.2 14 1 Released \N -24 f202cd36-d0dc-40df-9dd6-a8218e0d1366 genebuild EXT01 2023-09-22 15:03:01 GCA_000002765.2_EXT01 14 2 Released \N -37 6f8bd121-0345-4b77-9dc1-d567ac13447d assembly \N 2023-09-22 15:03:02 GCA_021950905.1 18 1 Processed \N -38 2ef7c056-847e-4742-a68b-18c3ece068aa genebuild ENS01 2023-09-22 15:03:02 GCA_021950905.1_ENS01 18 2 Submitted \N -61 3474e0d6-d031-40bc-a4ae-230236886568 assembly \N 2023-09-22 15:03:22 GCA_000001405.14 40 1 Released \N -62 e456d1c2-eea2-40f1-83ee-31912905b695 genebuild GENCODE19 2023-09-22 15:03:22 GCA_000001405.14_GENCODE19 40 2 Released \N -147 999315f6-6d25-481f-a017-297f7e1490c8 assembly \N 2023-09-22 15:04:29 GCA_900519105.1 79 1 Released \N -148 287a5483-55a4-46e6-a58b-a84ba0ddacd6 genebuild EXT01 2023-09-22 15:04:29 GCA_900519105.1_EXT01 79 2 Released \N -171 c813f7b7-645c-45ac-8536-08190fd7daa0 assembly \N 2023-09-22 15:04:45 GCA_000001405.29 92 1 Released \N -172 949defef-c4d2-4ab1-8a73-f41d2b3c7719 genebuild GENCODE44 2023-09-22 15:04:45 GCA_000001405.29_GENCODE44 92 2 Released \N -177 3c67123a-e9e1-41ef-9014-2aadc8acf12a assembly \N 2023-09-22 15:04:50 GCA_018505865.1 97 1 Processed \N -178 ed8ca387-38e3-4bfe-8b85-757a59b95126 genebuild ENS01 2023-09-22 15:04:50 GCA_018505865.1_ENS01 97 2 Processed \N -183 8801edaf-86ec-4799-8fd4-a59077f04c05 assembly \N 2023-09-22 15:04:53 GCA_018852605.1 100 1 Processed \N -184 11a0be7f-99ae-45d3-a004-dc19bb562330 genebuild ENS01 2023-09-22 15:04:53 GCA_018852605.1_ENS01 100 2 Processed \N -197 fd27883a-e5d3-4502-b774-65d3cc4f4e18 assembly \N 2023-09-22 15:04:56 GCA_018469925.1 107 1 Processed \N -249 786344d1-a71f-4bab-aa37-6ee315ed60a4 assembly \N 2023-09-22 15:05:37 GCA_018469425.1 135 1 Processed \N -250 2bc8874e-6672-4293-89d6-0b837005177c genebuild ENS01 2023-09-22 15:05:37 GCA_018469425.1_ENS01 135 2 Processed \N -337 eb451e00-7abb-4462-82bf-f29f6ed3dc1b assembly \N 2023-09-22 15:06:39 GCA_021951015.1 179 1 Processed \N -338 bd63a676-45ff-494a-b26f-2b779cb6c180 genebuild ENS01 2023-09-22 15:06:39 GCA_021951015.1_ENS01 179 2 Processed \N -347 6790a2a6-b178-4ab2-a12b-aad3d5511713 assembly \N 2023-09-22 15:06:43 GCA_018473295.1 185 1 Released \N -348 23d52e01-2e3d-495f-b345-df41c605caa9 genebuild ENS01 2023-09-22 15:06:43 GCA_018473295.1_ENS01 185 2 Released \N -401 3b58ee8a-8f8d-4dfe-bb58-44c2ed57f229 assembly \N 2023-09-22 15:06:55 GCA_000146045.2 214 1 Released \N -402 cfef61f8-7e24-4ed6-945f-baca1b2664a3 genebuild EXT01 2023-09-22 15:06:55 GCA_000146045.2_EXT01 214 2 Released \N -405 6c1896f9-10dd-423e-a1ff-db8b5815cb66 assembly \N 2023-09-22 15:06:58 GCA_000002985.3 217 1 Released \N -406 ea69f164-cc77-4671-bf97-c7f537dc400e genebuild EXT01 2023-09-22 15:06:58 GCA_000002985.3_EXT01 217 2 Released \N -888 9d717ead-ffe0-4fc1-b58c-3c057b754021 genebuild ENS01 2023-11-07 11:18:55 GCA_018473315.1_ENS01 4 2 Released \N -890 1c759aca-63a1-4eea-abe8-ef5f298fe6e2 genebuild ENS01 2023-11-07 11:18:57 GCA_018469415.1_ENS01 5 2 Released \N -892 664088c7-356e-418c-adb2-15945b7ebc4b genebuild ENS01 2023-11-07 11:19:00 GCA_018469875.1_ENS01 6 2 Processed \N -896 f9690d7e-26c1-459d-8102-0c4a1a468806 genebuild ENS01 2023-11-07 11:19:16 GCA_018852615.1_ENS01 9 2 Processed \N -1006 66db32ae-974f-480c-a60b-63cc49d00f68 genebuild ENS01 2023-11-07 11:22:53 GCA_018469925.1_ENS01 107 2 Processed \N -1391 bf1f5064-8520-4f19-84e4-449aa6c1c1e2 variation 1.0 2023-11-09 12:49:25.273751 GRCh38 673 3 Released \N -1392 5b869bbb-098f-4827-afc0-532a2bc88903 variation 1.0 2023-11-09 12:49:25.797822 GRCh37 674 3 Released \N -1393 5717d47e-dad9-4c2d-b015-c055bc93e831 evidence 1.0 2023-11-09 12:49:26.267728 GRCh38 673 4 Released \N -1394 e0202b0e-578d-493f-aeb4-464f5d2e233a evidence 1.0 2023-11-09 12:49:26.756909 GRCh37 674 4 Released \N -1405 bfa00db1-7925-4caa-8c7c-fa48bc5701a5 evidence 1.0 2023-11-09 12:49:31.245732 R64-1-1 644 4 Released \N -1414 c9d18d30-f7ad-44f4-becf-9b2c3606ac4f evidence 1.0 2023-11-09 12:49:34.817886 ASM276v2 653 4 Released \N -1421 80babe97-b289-407d-af70-b46ff5478f2e evidence 1.0 2023-11-09 12:49:37.663593 IWGSC 660 4 Released \N -1464 5c2d6ef7-fe03-4f1a-bcc2-fb72af9ffa46 variation 1.0 2023-11-09 12:49:58.762831 HG03516.pri.mat.f1_v2 565 3 Processed \N -1470 69f38cd5-5774-4b29-9ef4-181441ca0eb8 variation 1.0 2023-11-09 12:50:01.822030 HG02622.alt.pat.f1_v2 571 3 Processed \N -1475 08543d8d-2110-46f3-a9b6-ac58c4af8202 variation 1.0 2023-11-09 12:50:04.431570 HG03540.alt.pat.f1_v2 576 3 Released \N -1491 a4c858c4-2e55-43c9-af8b-7adc7ce24c58 variation 1.0 2023-11-09 12:50:12.565449 HG02622.pri.mat.f1_v2 592 3 Processed \N -1494 79e1a7b8-85dc-4bfe-b606-6992ca109ede variation 1.0 2023-11-09 12:50:14.106939 HG02109.pri.mat.f1_v2 595 3 Processed \N -1496 0c1cdfea-cb10-4a08-8f75-4158658d6a02 variation 1.0 2023-11-09 12:50:15.157829 HG02109.alt.pat.f1_v2 597 3 Processed \N -1507 dd28ea84-6890-4af8-bd2b-2caa9917f221 variation 1.0 2023-11-09 12:50:20.833289 HG03516.alt.pat.f1_v2 608 3 Released \N -1509 b876cb36-6e84-4a2c-8af2-14e096d48df9 variation 1.0 2023-11-09 12:50:21.809661 HG03540.pri.mat.f1_v2 610 3 Released \N -1528 0a0bed83-72c7-4f8a-a1cb-97450ef82495 variation 1.0 2023-11-09 12:50:31.531084 R64-1-1 644 3 Released \N -1537 ff7cb333-fc39-4f00-93e0-65a0d5eb596b variation 1.0 2023-11-09 12:50:36.212197 ASM276v2 653 3 Released \N -1544 e659bef9-22f7-4ad2-8215-4a48ecd228df variation 1.0 2023-11-09 12:50:39.743563 IWGSC 660 3 Released \N -2276 0bdc7428-6f81-4c96-a8d3-f5d5bff5a9b8 compara_homologies 1.0 2023-11-14 16:49:59.780613 Compara homologies 251 6 Released \N -2291 4b02b11e-397c-4a4f-8c13-8b65efb87030 compara_homologies 1.0 2023-11-14 16:50:13.529466 Compara homologies 266 6 Released \N -2319 35f56606-9186-432f-b033-6e7204708f3b compara_homologies 1.0 2023-11-14 16:50:38.690925 Compara homologies 294 6 Processed \N -2348 aebf0b81-4234-4aa9-85cc-abfe91f5eac2 compara_homologies 1.0 2023-11-14 16:51:03.416326 Compara homologies 323 6 Released \N -2357 caac6097-4921-4c10-bfc0-1c3e9b2604dc compara_homologies 1.0 2023-11-14 16:51:11.265120 Compara homologies 332 6 Released \N -2384 9f45f1a6-d4d0-4c02-9509-dec5a0d523fb compara_homologies 1.0 2023-11-14 16:51:33.936250 Compara homologies 359 6 Released \N -2394 b67e1761-3341-4965-9a5b-041cb8230cb3 compara_homologies 1.0 2023-11-14 16:51:42.824153 Compara homologies 369 6 Released \N -2408 0571d77c-5cc6-4819-80bf-34a42acfc3f6 compara_homologies 1.0 2023-11-14 16:51:54.939579 Compara homologies 383 6 Released \N -2449 e6df4d05-8567-4143-8ea0-c6ad1b5a3fc2 compara_homologies 1.0 2023-11-14 16:52:30.671955 Compara homologies 424 6 Released \N -2454 f2734f34-36a0-4594-871d-f7f6d317d05a compara_homologies 1.0 2023-11-14 16:52:35.328579 Compara homologies 429 6 Submitted \N -2458 58df568e-48c1-4a3b-838b-448540392f9c compara_homologies 1.0 2023-11-14 16:52:38.647877 Compara homologies 433 6 Released \N -2482 832c1885-6b98-4bf0-b4cf-70e317399bd4 compara_homologies 1.0 2023-11-14 16:52:59.648742 Compara homologies 457 6 Released \N -2494 f32b7f9a-97fd-41cd-86be-a5fb5becd335 compara_homologies 1.0 2023-11-14 16:53:10.265485 Compara homologies 469 6 Processed \N -2518 679d6452-799c-4a2f-8906-0db6c639e498 regulatory_features 1.0 2023-11-15 15:07:12.410801 Regulatory Annotation 670 7 Released \N -6593 8ebbce8e-dcc7-49f8-b520-4d479aef2a65 compara_homologies 1.0 2024-02-06 21:29:06.353565 Compara homologies 457 6 Processed \N -6623 60fae37a-afe2-4bb1-9da0-321911dd2856 compara_homologies 1.0 2024-02-06 21:29:21.311224 Compara homologies 266 6 Processed \N -6699 06debe4a-2b3e-4fab-991e-26863dbe8af8 compara_homologies 1.0 2024-02-06 21:29:59.006535 Compara homologies 323 6 Processed \N -6849 f93d21ca-9a24-4c31-ae11-b0f8d3deab6d compara_homologies 1.0 2024-02-06 21:31:13.258218 Compara homologies 423 6 Submitted \N -6896 f3abf167-6a8f-45cc-b753-22a955123758 compara_homologies 1.0 2024-02-06 21:31:36.368299 Compara homologies 387 6 Processed \N -7069 9681f4c2-afb4-4a08-8e4d-f26363f65ddf compara_homologies 1.0 2024-02-06 21:33:02.240258 Compara homologies 369 6 Processed \N -7177 d57040b6-0ef5-4e6b-97ef-be0ad94d3a61 compara_homologies 1.0 2024-02-06 21:33:55.672317 Compara homologies 424 6 Processed \N -7320 c4f0eec5-f4b3-4cef-8369-7b13932509e1 compara_homologies 1.0 2024-02-06 21:35:06.396207 Compara homologies 284 6 Processed \N -7535 d51ab85a-f037-47a3-ba53-423ad8e42669 compara_homologies 1.0 2024-02-06 21:36:52.499221 Compara homologies 383 6 Processed \N -7603 254a68c7-f512-446d-a958-983a2713daf2 compara_homologies 1.0 2024-02-06 21:37:25.921259 Compara homologies 359 6 Processed \N -7785 9f2a7c92-e04a-443f-a991-1481a9466456 compara_homologies 1.0 2024-02-06 21:38:56.300529 Compara homologies 313 6 Processed \N -7820 681ceca3-3336-4b92-ac11-43b5fcabec62 compara_homologies 1.0 2024-02-06 21:39:13.565274 Compara homologies 251 6 Processed \N -7847 d78259af-f491-42de-9cbf-de744b09efee compara_homologies 1.0 2024-02-06 21:39:26.897641 Compara homologies 332 6 Processed \N -8130 5b618784-a5ff-46cc-8102-b082ffb6e447 compara_homologies 1.0 2024-02-06 21:41:47.150011 Compara homologies 368 6 Submitted \N -8392 b6472939-9e49-4d46-b93e-304910acabf3 compara_homologies 1.0 2024-02-06 21:44:00.982498 Compara homologies 4352 6 Processed \N -8661 a5bf42be-63c1-4616-9af1-bc03aea92643 compara_homologies 1.0 2024-02-06 21:46:14.099319 Compara homologies 443 6 Submitted \N -8662 af8eee44-ca56-4baf-a5f1-ad60d1165f3a genebuild_compute ENS01 2024-04-24 10:18:12 From 23d52e01-2e3d-495f-b345-df41c605caa9 185 8 Released 348 -8663 a0fef323-23b9-4d4c-87b3-42f290dffbc7 xrefs ENS01 2024-04-24 10:18:12 From af8eee44-ca56-4baf-a5f1-ad60d1165f3a 185 13 Released 8662 -8664 5d12b439-f994-408b-a7cc-88a0ce2a1c5e protein_features ENS01 2024-04-24 10:18:12 From af8eee44-ca56-4baf-a5f1-ad60d1165f3a 185 14 Released 8662 -8665 fd7c81b9-bd96-4074-a78f-ce86059d3a55 alpha_fold ENS01 2024-04-24 10:18:12 From af8eee44-ca56-4baf-a5f1-ad60d1165f3a 185 15 Released 8662 -8666 e4630a33-1d85-4a93-9c3d-ba23f531e900 genebuild_files ENS01 2024-04-24 10:18:12 From 23d52e01-2e3d-495f-b345-df41c605caa9 185 9 Released 348 -8667 5dc9ebba-a6f0-4380-9f9b-5735855c8c0b blast ENS01 2024-04-24 10:18:12 From e4630a33-1d85-4a93-9c3d-ba23f531e900 185 16 Released 8666 -8668 12304159-93ac-4597-bbfb-fc487a580bd2 ftp_dumps ENS01 2024-04-24 10:18:12 From e4630a33-1d85-4a93-9c3d-ba23f531e900 185 17 Released 8666 -8669 aaf2c600-821f-4ade-a3e7-fde21c333060 genebuild_web ENS01 2024-04-24 10:18:12 From 23d52e01-2e3d-495f-b345-df41c605caa9 185 11 Released 348 -8670 97e9f169-4ba2-4d44-b958-7fc3233c2c24 thoas_dumps ENS01 2024-04-24 10:18:12 From aaf2c600-821f-4ade-a3e7-fde21c333060 185 18 Released 8669 -8671 45c72ec3-7b5e-4b5e-83f8-1fc5790b1ad4 browser_files ENS01 2024-04-24 10:18:12 From aaf2c600-821f-4ade-a3e7-fde21c333060 185 20 Released 8669 -8672 7a33c596-7883-4638-86d4-9aa4ac266110 checksums ENS01 2024-04-24 10:18:12 From aaf2c600-821f-4ade-a3e7-fde21c333060 185 23 Released 8669 -8673 d6f3a493-9abf-4cef-81b7-dbf7bd0a8c4c genebuild_web ENS01 2024-04-24 10:18:12 From 23d52e01-2e3d-495f-b345-df41c605caa9 185 12 Released 348 -8674 1ff09568-0971-4907-b023-2e81b9d73e61 thoas_load ENS01 2024-04-24 10:18:12 From d6f3a493-9abf-4cef-81b7-dbf7bd0a8c4c 185 19 Released 8673 -8675 ff3d51eb-154b-4665-887c-c406cc3bc78b genebuild_track ENS01 2024-04-24 10:18:12 From d6f3a493-9abf-4cef-81b7-dbf7bd0a8c4c 185 21 Released 8673 -8676 76bc09f9-ab83-49ad-9b14-b81dd2ee5eb0 refget_load ENS01 2024-04-24 10:18:12 From d6f3a493-9abf-4cef-81b7-dbf7bd0a8c4c 185 24 Released 8673 -8677 5fb17152-b5bd-4a8f-8b00-1e87b1bdd036 genebuild_compute ENS01 2024-04-24 10:18:12 From 9d717ead-ffe0-4fc1-b58c-3c057b754021 4 8 Released 888 -8678 8a49f103-b405-4f54-8714-980007cfe776 xrefs ENS01 2024-04-24 10:18:12 From 5fb17152-b5bd-4a8f-8b00-1e87b1bdd036 4 13 Released 8677 -8679 129e0e2b-b778-47d5-9252-822af8adbf5b protein_features ENS01 2024-04-24 10:18:12 From 5fb17152-b5bd-4a8f-8b00-1e87b1bdd036 4 14 Released 8677 -8680 d811d1ff-8e73-4215-b622-4da5d1ae68bc alpha_fold ENS01 2024-04-24 10:18:12 From 5fb17152-b5bd-4a8f-8b00-1e87b1bdd036 4 15 Released 8677 -8681 b57a6524-00c5-423a-b569-57e2039d5f75 genebuild_files ENS01 2024-04-24 10:18:12 From 9d717ead-ffe0-4fc1-b58c-3c057b754021 4 9 Released 888 -8682 90d32255-8476-4d33-8aca-cd05b65f6b6f blast ENS01 2024-04-24 10:18:12 From b57a6524-00c5-423a-b569-57e2039d5f75 4 16 Released 8681 -8683 3243f1ac-38aa-412b-9a2a-c5edf0336a2f ftp_dumps ENS01 2024-04-24 10:18:12 From b57a6524-00c5-423a-b569-57e2039d5f75 4 17 Released 8681 -8684 c68f4857-0f85-48eb-a9d1-37e7e9cb5d71 genebuild_web ENS01 2024-04-24 10:18:12 From 9d717ead-ffe0-4fc1-b58c-3c057b754021 4 11 Released 888 -8685 98cb07a1-2a1d-496a-a0a7-168662eda07b thoas_dumps ENS01 2024-04-24 10:18:12 From c68f4857-0f85-48eb-a9d1-37e7e9cb5d71 4 18 Released 8684 -8686 3b9fee1b-0c94-4345-9599-919ad721a7da browser_files ENS01 2024-04-24 10:18:12 From c68f4857-0f85-48eb-a9d1-37e7e9cb5d71 4 20 Released 8684 -8687 8978bd71-c1b1-40b5-8628-1dd84115badd checksums ENS01 2024-04-24 10:18:12 From c68f4857-0f85-48eb-a9d1-37e7e9cb5d71 4 23 Released 8684 -8688 17e767e8-4f6a-40a2-aaa1-ee4f03e37567 genebuild_web ENS01 2024-04-24 10:18:12 From 9d717ead-ffe0-4fc1-b58c-3c057b754021 4 12 Released 888 -8689 53979850-c127-4a85-a680-9183978bb250 thoas_load ENS01 2024-04-24 10:18:12 From 17e767e8-4f6a-40a2-aaa1-ee4f03e37567 4 19 Released 8688 -8690 2b8d9066-8a02-4d47-ab29-c39f43ccfc53 genebuild_track ENS01 2024-04-24 10:18:12 From 17e767e8-4f6a-40a2-aaa1-ee4f03e37567 4 21 Released 8688 -8691 4d418be3-ea1b-4f36-afa4-c40d113b3910 refget_load ENS01 2024-04-24 10:18:12 From 17e767e8-4f6a-40a2-aaa1-ee4f03e37567 4 24 Released 8688 -8692 3cfe16ce-8a7e-49c8-b719-2affce984771 genebuild_compute ENS01 2024-04-24 10:18:13 From 1c759aca-63a1-4eea-abe8-ef5f298fe6e2 5 8 Released 890 -8693 f5eeacaa-8ee4-4739-8aed-a6aeaaadd13e xrefs ENS01 2024-04-24 10:18:13 From 3cfe16ce-8a7e-49c8-b719-2affce984771 5 13 Released 8692 -8694 808c43bc-7950-4e32-935b-ef13f1a869c1 protein_features ENS01 2024-04-24 10:18:13 From 3cfe16ce-8a7e-49c8-b719-2affce984771 5 14 Released 8692 -8695 7413ae88-d840-47ca-a602-713e03e6b123 alpha_fold ENS01 2024-04-24 10:18:13 From 3cfe16ce-8a7e-49c8-b719-2affce984771 5 15 Released 8692 -8696 6a5f98e4-2f2f-4c88-9172-d02dc623c42f genebuild_files ENS01 2024-04-24 10:18:13 From 1c759aca-63a1-4eea-abe8-ef5f298fe6e2 5 9 Released 890 -8697 cd9f881d-1716-425d-9879-472193cbf337 blast ENS01 2024-04-24 10:18:13 From 6a5f98e4-2f2f-4c88-9172-d02dc623c42f 5 16 Released 8696 -8698 84d2b150-1e5b-49b2-a35c-14596d503ae0 ftp_dumps ENS01 2024-04-24 10:18:13 From 6a5f98e4-2f2f-4c88-9172-d02dc623c42f 5 17 Released 8696 -8699 5b63c887-d867-411c-b138-536ed0c430de genebuild_web ENS01 2024-04-24 10:18:13 From 1c759aca-63a1-4eea-abe8-ef5f298fe6e2 5 11 Released 890 -8700 3a7c3603-acfa-4803-8c0c-c413501c6180 thoas_dumps ENS01 2024-04-24 10:18:13 From 5b63c887-d867-411c-b138-536ed0c430de 5 18 Released 8699 -8701 6a52d021-242b-4e7a-84aa-b2f08d6b1e89 browser_files ENS01 2024-04-24 10:18:13 From 5b63c887-d867-411c-b138-536ed0c430de 5 20 Released 8699 -8702 30f5310e-7e78-4027-aa32-82de71946e20 checksums ENS01 2024-04-24 10:18:13 From 5b63c887-d867-411c-b138-536ed0c430de 5 23 Released 8699 -8703 f67689b2-8c52-4c3e-89da-70520e0613d8 genebuild_web ENS01 2024-04-24 10:18:13 From 1c759aca-63a1-4eea-abe8-ef5f298fe6e2 5 12 Released 890 -8704 e4e75159-a56c-4a38-ac81-b74f7e89c022 thoas_load ENS01 2024-04-24 10:18:13 From f67689b2-8c52-4c3e-89da-70520e0613d8 5 19 Released 8703 -8705 981eb155-b2c5-4571-955f-f2d7574ef5eb genebuild_track ENS01 2024-04-24 10:18:13 From f67689b2-8c52-4c3e-89da-70520e0613d8 5 21 Released 8703 -8706 078f49f0-cc6e-4e4a-bfee-f8fc240a635a refget_load ENS01 2024-04-24 10:18:13 From f67689b2-8c52-4c3e-89da-70520e0613d8 5 24 Released 8703 -8707 705c3da3-186f-42f7-bd2d-795285e9b246 evidence 1.0 2024-04-24 10:18:13 From 08543d8d-2110-46f3-a9b6-ac58c4af8202 576 4 Released 1475 -8708 bb98fc3a-30af-41b2-9dac-29d580b42b68 short_variant 1.0 2024-04-24 10:18:13 From 08543d8d-2110-46f3-a9b6-ac58c4af8202 576 5 Released 1475 -8709 4b9b9585-8570-4f26-8819-a45f92df8d23 variation_ftp 1.0 2024-04-24 10:18:13 From 08543d8d-2110-46f3-a9b6-ac58c4af8202 576 29 Released 1475 -8710 7ca071a6-2ab3-4535-8c52-a21cb012fe0c browser_files 1.0 2024-04-24 10:18:13 From 08543d8d-2110-46f3-a9b6-ac58c4af8202 576 31 Released 1475 -8711 9d5325d8-49ba-4606-aaa2-c7269a19f5f7 variation_track 1.0 2024-04-24 10:18:13 From 08543d8d-2110-46f3-a9b6-ac58c4af8202 576 32 Released 1475 -8712 d6d6f12a-c806-4db0-99dd-a667fbd7c191 evidence 1.0 2024-04-24 10:18:13 From dd28ea84-6890-4af8-bd2b-2caa9917f221 608 4 Released 1507 -8713 8ed8f4b5-423e-44f1-a01d-bf2c19857374 short_variant 1.0 2024-04-24 10:18:13 From dd28ea84-6890-4af8-bd2b-2caa9917f221 608 5 Released 1507 -8714 e17b4956-cf44-4ce3-bad6-c141fae5148c variation_ftp 1.0 2024-04-24 10:18:13 From dd28ea84-6890-4af8-bd2b-2caa9917f221 608 29 Released 1507 -8715 48876d82-b51c-41d5-818f-5af04bcf8fc3 browser_files 1.0 2024-04-24 10:18:13 From dd28ea84-6890-4af8-bd2b-2caa9917f221 608 31 Released 1507 -8716 c2dab1e0-5d72-4d50-9ec7-c8e90746ec65 variation_track 1.0 2024-04-24 10:18:13 From dd28ea84-6890-4af8-bd2b-2caa9917f221 608 32 Released 1507 -8717 6d799ad1-5fe6-477f-8ddb-6a16ab3ea33a evidence 1.0 2024-04-24 10:18:13 From b876cb36-6e84-4a2c-8af2-14e096d48df9 610 4 Released 1509 -8718 c06b0534-4708-4cec-913b-8b354fda0c6b short_variant 1.0 2024-04-24 10:18:13 From b876cb36-6e84-4a2c-8af2-14e096d48df9 610 5 Released 1509 -8719 fff94973-5318-4821-9afa-3fd2fc0b7a4e variation_ftp 1.0 2024-04-24 10:18:13 From b876cb36-6e84-4a2c-8af2-14e096d48df9 610 29 Released 1509 -8720 b5a2b993-1252-4495-bdf3-9eae0322cf39 browser_files 1.0 2024-04-24 10:18:13 From b876cb36-6e84-4a2c-8af2-14e096d48df9 610 31 Released 1509 -8721 db33d965-62b4-4d83-9738-ade69df4cff5 variation_track 1.0 2024-04-24 10:18:13 From b876cb36-6e84-4a2c-8af2-14e096d48df9 610 32 Released 1509 -8722 7780e084-b3ca-4df4-ba40-5a8bfea6e9a1 homology_compute 1.0 2024-04-24 10:18:13 From 0bdc7428-6f81-4c96-a8d3-f5d5bff5a9b8 251 25 Released 2276 -8723 71fe75e0-50ca-4b74-a8a6-3d8d016e4227 homology_load 1.0 2024-04-24 10:18:13 From 0bdc7428-6f81-4c96-a8d3-f5d5bff5a9b8 251 26 Released 2276 -8724 10879879-9a3a-4dfd-b0eb-c06699f0aada homology_ftp 1.0 2024-04-24 10:18:13 From 0bdc7428-6f81-4c96-a8d3-f5d5bff5a9b8 251 27 Released 2276 -8725 2925c0ee-f987-4102-b792-4904c7b98d19 homology_compute 1.0 2024-04-24 10:18:13 From 0571d77c-5cc6-4819-80bf-34a42acfc3f6 383 25 Released 2408 -8726 6f0b5633-abef-4daf-be84-489a979f8b0b homology_load 1.0 2024-04-24 10:18:13 From 0571d77c-5cc6-4819-80bf-34a42acfc3f6 383 26 Released 2408 -8727 3e44f562-0cae-4165-9ef5-75fd6593d2e1 homology_ftp 1.0 2024-04-24 10:18:13 From 0571d77c-5cc6-4819-80bf-34a42acfc3f6 383 27 Released 2408 -8728 80a73415-7eda-4c22-80a1-93508c1ebc03 homology_compute 1.0 2024-04-24 10:18:13 From 832c1885-6b98-4bf0-b4cf-70e317399bd4 457 25 Released 2482 -8729 11185ed4-ea77-406e-bbbd-829601db2463 homology_load 1.0 2024-04-24 10:18:13 From 832c1885-6b98-4bf0-b4cf-70e317399bd4 457 26 Released 2482 -8730 745f6eba-367b-4db9-92f7-7353603ce4ce homology_ftp 1.0 2024-04-24 10:18:13 From 832c1885-6b98-4bf0-b4cf-70e317399bd4 457 27 Released 2482 -8731 1dcbaf3e-5179-434c-beae-1416149f30cf genebuild_compute ENS01 2024-04-24 10:18:13 From 53936715-1371-4343-95af-f39d06943db7 7 8 Processed 14 -8732 d340ac5b-2f9b-44d7-bab8-99ff17516053 xrefs ENS01 2024-04-24 10:18:13 From 1dcbaf3e-5179-434c-beae-1416149f30cf 7 13 Processed 8731 -8733 5a2fd22b-31ac-4e69-ad20-137b6d297cf8 protein_features ENS01 2024-04-24 10:18:13 From 1dcbaf3e-5179-434c-beae-1416149f30cf 7 14 Processed 8731 -8734 7f988fe4-f4d5-4bd7-8516-2cfc767d7ec6 alpha_fold ENS01 2024-04-24 10:18:13 From 1dcbaf3e-5179-434c-beae-1416149f30cf 7 15 Processed 8731 -8735 81bdc51c-4c4f-4e07-850b-562d0d964269 genebuild_files ENS01 2024-04-24 10:18:13 From 53936715-1371-4343-95af-f39d06943db7 7 9 Processed 14 -8736 871842a4-566d-4b44-b883-caea737dbe70 blast ENS01 2024-04-24 10:18:13 From 81bdc51c-4c4f-4e07-850b-562d0d964269 7 16 Processed 8735 -8737 5e7deba4-4293-4d6a-b954-73dfdc3be208 ftp_dumps ENS01 2024-04-24 10:18:13 From 81bdc51c-4c4f-4e07-850b-562d0d964269 7 17 Processed 8735 -8738 2563efa7-e2be-401a-a89f-79ea71c17452 genebuild_web ENS01 2024-04-24 10:18:13 From 53936715-1371-4343-95af-f39d06943db7 7 11 Processed 14 -8739 430221cd-df0d-4727-bd3f-8bdd1e69fb62 thoas_dumps ENS01 2024-04-24 10:18:13 From 2563efa7-e2be-401a-a89f-79ea71c17452 7 18 Processed 8738 -8740 c918aa39-6dd4-4032-87da-5282e90c4142 browser_files ENS01 2024-04-24 10:18:13 From 2563efa7-e2be-401a-a89f-79ea71c17452 7 20 Processed 8738 -8741 ce889e08-ab8b-4420-891b-9fb3ab5f4e81 checksums ENS01 2024-04-24 10:18:13 From 2563efa7-e2be-401a-a89f-79ea71c17452 7 23 Processed 8738 -8742 96331294-fb65-457b-a3bd-5e41f1818044 genebuild_web ENS01 2024-04-24 10:18:13 From 53936715-1371-4343-95af-f39d06943db7 7 12 Processed 14 -8743 a31eb1a1-1ebe-478e-b6a5-fd04ffbb0e3c thoas_load ENS01 2024-04-24 10:18:13 From 96331294-fb65-457b-a3bd-5e41f1818044 7 19 Processed 8742 -8744 a1fae0e7-c124-4849-bfe5-e68c583b7826 genebuild_track ENS01 2024-04-24 10:18:13 From 96331294-fb65-457b-a3bd-5e41f1818044 7 21 Processed 8742 -8745 145ad879-9c5e-4833-9645-ec0e9fb35079 refget_load ENS01 2024-04-24 10:18:13 From 96331294-fb65-457b-a3bd-5e41f1818044 7 24 Processed 8742 -8746 8d55a4f8-0550-4770-aac2-c7963bfa1176 genebuild_compute ENS01 2024-04-24 10:18:13 From 2bc8874e-6672-4293-89d6-0b837005177c 135 8 Processed 250 -8747 d92da251-954c-417b-8e89-03c677a60553 xrefs ENS01 2024-04-24 10:18:13 From 8d55a4f8-0550-4770-aac2-c7963bfa1176 135 13 Processed 8746 -8748 819805e5-7aeb-437c-bb55-3918a7c94e48 protein_features ENS01 2024-04-24 10:18:13 From 8d55a4f8-0550-4770-aac2-c7963bfa1176 135 14 Processed 8746 -8749 9c57cc1b-ee2b-4adf-968b-8b6cec556f95 alpha_fold ENS01 2024-04-24 10:18:13 From 8d55a4f8-0550-4770-aac2-c7963bfa1176 135 15 Processed 8746 -8750 d4c1e848-8a50-4aae-95fe-5efb85833613 genebuild_files ENS01 2024-04-24 10:18:13 From 2bc8874e-6672-4293-89d6-0b837005177c 135 9 Processed 250 -8751 24fbb4e6-45a2-4bb9-a1b7-2b14fafc5135 blast ENS01 2024-04-24 10:18:13 From d4c1e848-8a50-4aae-95fe-5efb85833613 135 16 Processed 8750 -8752 32b8b190-ccb8-4d4e-8080-686fdb2bf853 ftp_dumps ENS01 2024-04-24 10:18:13 From d4c1e848-8a50-4aae-95fe-5efb85833613 135 17 Processed 8750 -8753 ebd92c3c-1aaf-43c6-ae72-dd649e624ea2 genebuild_web ENS01 2024-04-24 10:18:13 From 2bc8874e-6672-4293-89d6-0b837005177c 135 11 Processed 250 -8754 b41d8022-6a58-4a2b-a0fb-6776a722b7f0 thoas_dumps ENS01 2024-04-24 10:18:13 From ebd92c3c-1aaf-43c6-ae72-dd649e624ea2 135 18 Processed 8753 -8755 9d4373b8-4248-4afe-ab14-d6c14c1b19ea browser_files ENS01 2024-04-24 10:18:13 From ebd92c3c-1aaf-43c6-ae72-dd649e624ea2 135 20 Processed 8753 -8756 cc668846-b60c-4544-b151-84e77308595e checksums ENS01 2024-04-24 10:18:13 From ebd92c3c-1aaf-43c6-ae72-dd649e624ea2 135 23 Processed 8753 -8757 7f200073-0c6e-4487-ac0b-f5cb160151f5 genebuild_web ENS01 2024-04-24 10:18:14 From 2bc8874e-6672-4293-89d6-0b837005177c 135 12 Processed 250 -8758 4bedae91-0f82-478f-bba4-23f8dcb83ef0 thoas_load ENS01 2024-04-24 10:18:14 From 7f200073-0c6e-4487-ac0b-f5cb160151f5 135 19 Processed 8757 -8759 8389b690-fd8d-40bf-8bb7-05254261be6b genebuild_track ENS01 2024-04-24 10:18:14 From 7f200073-0c6e-4487-ac0b-f5cb160151f5 135 21 Processed 8757 -8760 e6f310ea-3a25-4adb-aa01-f514fe4d4183 refget_load ENS01 2024-04-24 10:18:14 From 7f200073-0c6e-4487-ac0b-f5cb160151f5 135 24 Processed 8757 -8761 05125758-03b0-43c3-b4eb-973f05293e42 genebuild_compute ENS01 2024-04-24 10:18:14 From 664088c7-356e-418c-adb2-15945b7ebc4b 6 8 Processed 892 -8762 055d833f-a5e3-4dc2-bd19-0827d05a576c xrefs ENS01 2024-04-24 10:18:14 From 05125758-03b0-43c3-b4eb-973f05293e42 6 13 Processed 8761 -8763 966759bd-d77b-4f97-8502-307ba251adc8 protein_features ENS01 2024-04-24 10:18:14 From 05125758-03b0-43c3-b4eb-973f05293e42 6 14 Processed 8761 -8764 154cbdc9-e1de-4d40-9e99-21a6d18cacaf alpha_fold ENS01 2024-04-24 10:18:14 From 05125758-03b0-43c3-b4eb-973f05293e42 6 15 Processed 8761 -8765 9ab20e16-0d40-4145-8ad5-32e498b4cff4 genebuild_files ENS01 2024-04-24 10:18:14 From 664088c7-356e-418c-adb2-15945b7ebc4b 6 9 Processed 892 -8766 0a975f98-3a5d-4270-9770-73cf4c48107b blast ENS01 2024-04-24 10:18:14 From 9ab20e16-0d40-4145-8ad5-32e498b4cff4 6 16 Processed 8765 -8767 4823d7a3-b2c8-4220-8652-20436a20d9ca ftp_dumps ENS01 2024-04-24 10:18:14 From 9ab20e16-0d40-4145-8ad5-32e498b4cff4 6 17 Processed 8765 -8768 a36bfaba-8751-403c-9024-ac00809cb748 genebuild_web ENS01 2024-04-24 10:18:14 From 664088c7-356e-418c-adb2-15945b7ebc4b 6 11 Processed 892 -8769 d781cd94-54f7-403a-8a03-1114db2ccfe6 thoas_dumps ENS01 2024-04-24 10:18:14 From a36bfaba-8751-403c-9024-ac00809cb748 6 18 Processed 8768 -8770 79d32d79-6346-4453-83d1-517ed275840b browser_files ENS01 2024-04-24 10:18:14 From a36bfaba-8751-403c-9024-ac00809cb748 6 20 Processed 8768 -8771 cd5a8672-9177-4e08-8eb2-8a770ee58ce7 checksums ENS01 2024-04-24 10:18:14 From a36bfaba-8751-403c-9024-ac00809cb748 6 23 Processed 8768 -8772 f6561cb1-4cae-47e7-ac63-ad2151f4b927 genebuild_web ENS01 2024-04-24 10:18:14 From 664088c7-356e-418c-adb2-15945b7ebc4b 6 12 Processed 892 -8773 161453c1-3e3c-423e-a4f4-0f048d7c134c thoas_load ENS01 2024-04-24 10:18:14 From f6561cb1-4cae-47e7-ac63-ad2151f4b927 6 19 Processed 8772 -8774 7f771283-0afa-4703-b534-3844646bc8e1 genebuild_track ENS01 2024-04-24 10:18:14 From f6561cb1-4cae-47e7-ac63-ad2151f4b927 6 21 Processed 8772 -8775 00f2c284-5eea-43c0-a225-6bcc319a0b7f refget_load ENS01 2024-04-24 10:18:14 From f6561cb1-4cae-47e7-ac63-ad2151f4b927 6 24 Processed 8772 -8776 da20e2b5-1809-494e-893f-7fb90e8032a1 genebuild_compute ENS01 2024-04-24 10:18:14 From 66db32ae-974f-480c-a60b-63cc49d00f68 107 8 Processed 1006 -8777 8ec9f005-91d7-4015-be09-7b61b6d62c54 xrefs ENS01 2024-04-24 10:18:14 From da20e2b5-1809-494e-893f-7fb90e8032a1 107 13 Processed 8776 -8778 fdd6e615-8ac7-41fc-b8b2-aff7aeb9c99a protein_features ENS01 2024-04-24 10:18:14 From da20e2b5-1809-494e-893f-7fb90e8032a1 107 14 Processed 8776 -8779 f6d9a2a5-d744-4a90-a9b4-8656108bf921 alpha_fold ENS01 2024-04-24 10:18:14 From da20e2b5-1809-494e-893f-7fb90e8032a1 107 15 Processed 8776 -8780 3f66717f-fcc1-4ea6-a6ae-1b038ec5f0cd genebuild_files ENS01 2024-04-24 10:18:14 From 66db32ae-974f-480c-a60b-63cc49d00f68 107 9 Processed 1006 -8781 656bdb4a-c2f0-4ff1-93a8-6a780ba47e26 blast ENS01 2024-04-24 10:18:14 From 3f66717f-fcc1-4ea6-a6ae-1b038ec5f0cd 107 16 Processed 8780 -8782 503fe667-0304-45db-ad36-860b9967290e ftp_dumps ENS01 2024-04-24 10:18:14 From 3f66717f-fcc1-4ea6-a6ae-1b038ec5f0cd 107 17 Processed 8780 -8783 472c2bcc-3de5-446b-8b05-e33c3975acdb genebuild_web ENS01 2024-04-24 10:18:14 From 66db32ae-974f-480c-a60b-63cc49d00f68 107 11 Processed 1006 -8784 83f43633-77ce-4164-8ec4-655707a4029d thoas_dumps ENS01 2024-04-24 10:18:14 From 472c2bcc-3de5-446b-8b05-e33c3975acdb 107 18 Processed 8783 -8785 722acf20-f184-4ac4-b9ad-947de51b051e browser_files ENS01 2024-04-24 10:18:14 From 472c2bcc-3de5-446b-8b05-e33c3975acdb 107 20 Processed 8783 -8786 8e15dcb4-cbd2-4971-a155-8d5956a38c41 checksums ENS01 2024-04-24 10:18:14 From 472c2bcc-3de5-446b-8b05-e33c3975acdb 107 23 Processed 8783 -8787 aa4b8122-4480-4595-b2bf-c8c8f51537ce genebuild_web ENS01 2024-04-24 10:18:14 From 66db32ae-974f-480c-a60b-63cc49d00f68 107 12 Processed 1006 -8788 4e7f51fc-b5f6-4aa3-ab31-b22a23d080b2 thoas_load ENS01 2024-04-24 10:18:14 From aa4b8122-4480-4595-b2bf-c8c8f51537ce 107 19 Processed 8787 -8789 9770c787-1b91-4e65-bf76-5dc0d1c5c75f genebuild_track ENS01 2024-04-24 10:18:14 From aa4b8122-4480-4595-b2bf-c8c8f51537ce 107 21 Processed 8787 -8790 0f140a16-de49-4566-a41d-8e1ccbc8f5d0 refget_load ENS01 2024-04-24 10:18:14 From aa4b8122-4480-4595-b2bf-c8c8f51537ce 107 24 Processed 8787 -8791 e0a75f02-6ac6-4dfa-8196-50cb4803a9b8 evidence 1.0 2024-04-24 10:18:14 From 5c2d6ef7-fe03-4f1a-bcc2-fb72af9ffa46 565 4 Processed 1464 -8792 04892d42-1689-4e76-a158-717f1c773a3d short_variant 1.0 2024-04-24 10:18:14 From 5c2d6ef7-fe03-4f1a-bcc2-fb72af9ffa46 565 5 Processed 1464 -8793 4b9774b2-eabd-4981-b098-521b5b8a13a0 browser_files 1.0 2024-04-24 10:18:14 From 5c2d6ef7-fe03-4f1a-bcc2-fb72af9ffa46 565 31 Processed 1464 -8794 f8c7383b-aaac-41cf-9ac8-dce5f99b5338 variation_ftp 1.0 2024-04-24 10:18:14 From 5c2d6ef7-fe03-4f1a-bcc2-fb72af9ffa46 565 29 Processed 1464 -8795 3aeb9633-f9a6-4693-b51c-875935bb3e16 variation_track 1.0 2024-04-24 10:18:14 From 5c2d6ef7-fe03-4f1a-bcc2-fb72af9ffa46 565 32 Processed 1464 -8796 628143db-4adb-4086-9168-1f7e875bbafe evidence 1.0 2024-04-24 10:18:14 From 69f38cd5-5774-4b29-9ef4-181441ca0eb8 571 4 Processed 1470 -8797 0c150044-3236-4cbd-ba06-19e19d10000a short_variant 1.0 2024-04-24 10:18:14 From 69f38cd5-5774-4b29-9ef4-181441ca0eb8 571 5 Processed 1470 -8798 e488f2c0-8926-4cd4-a3f4-5085885273ba browser_files 1.0 2024-04-24 10:18:14 From 69f38cd5-5774-4b29-9ef4-181441ca0eb8 571 31 Processed 1470 -8799 0b1d6792-050b-461b-b6e7-8013f03caace variation_ftp 1.0 2024-04-24 10:18:14 From 69f38cd5-5774-4b29-9ef4-181441ca0eb8 571 29 Processed 1470 -8800 b55eedc6-c8e9-46f9-8f3a-5487b590d563 variation_track 1.0 2024-04-24 10:18:14 From 69f38cd5-5774-4b29-9ef4-181441ca0eb8 571 32 Processed 1470 -8801 4f615d55-8b10-4004-88c0-169e1016032c evidence 1.0 2024-04-24 10:18:14 From a4c858c4-2e55-43c9-af8b-7adc7ce24c58 592 4 Processed 1491 -8802 9bdb03db-aed4-41af-be5c-7912d5cf82ad short_variant 1.0 2024-04-24 10:18:14 From a4c858c4-2e55-43c9-af8b-7adc7ce24c58 592 5 Processed 1491 -8803 1bc869fb-7586-4394-8da3-40502ce06f28 browser_files 1.0 2024-04-24 10:18:14 From a4c858c4-2e55-43c9-af8b-7adc7ce24c58 592 31 Processed 1491 -8804 0d05a7a3-6a4d-4f61-bcfe-76728865fe0a variation_ftp 1.0 2024-04-24 10:18:14 From a4c858c4-2e55-43c9-af8b-7adc7ce24c58 592 29 Processed 1491 -8805 3f87e8e7-21f6-441c-83a4-a5c7204c5320 variation_track 1.0 2024-04-24 10:18:14 From a4c858c4-2e55-43c9-af8b-7adc7ce24c58 592 32 Processed 1491 -8806 206879dd-6d27-451d-b5aa-8330696afc1a evidence 1.0 2024-04-24 10:18:14 From 79e1a7b8-85dc-4bfe-b606-6992ca109ede 595 4 Processed 1494 -8807 1f82e0d6-2232-4dc7-9dba-c1c62b42c24f short_variant 1.0 2024-04-24 10:18:14 From 79e1a7b8-85dc-4bfe-b606-6992ca109ede 595 5 Processed 1494 -8808 4c9ad5d4-3a35-40b7-ba30-a09b958a205b browser_files 1.0 2024-04-24 10:18:14 From 79e1a7b8-85dc-4bfe-b606-6992ca109ede 595 31 Processed 1494 -8809 1c37a8bf-43d6-4fc8-98d8-b4e0d3a31931 variation_ftp 1.0 2024-04-24 10:18:14 From 79e1a7b8-85dc-4bfe-b606-6992ca109ede 595 29 Processed 1494 -8810 460598d9-1266-4652-8ef8-536e8c1391fe variation_track 1.0 2024-04-24 10:18:14 From 79e1a7b8-85dc-4bfe-b606-6992ca109ede 595 32 Processed 1494 -8811 6c28c1ae-af07-4199-9da9-b48ae0fcb7b7 homology_compute 1.0 2024-04-24 10:18:14 From 35f56606-9186-432f-b033-6e7204708f3b 294 25 Processed 2319 -8812 4d9b87ea-6313-4aee-ad8b-ab2a2813e22a homology_load 1.0 2024-04-24 10:18:14 From 35f56606-9186-432f-b033-6e7204708f3b 294 26 Processed 2319 -8813 9e3a4f8a-95f2-415c-88ec-5da6eb3e7c10 homology_ftp 1.0 2024-04-24 10:18:14 From 35f56606-9186-432f-b033-6e7204708f3b 294 27 Processed 2319 -8814 cb8f93b0-b903-49c8-ad48-1cc4f4ceee6a homology_compute 1.0 2024-04-24 10:18:14 From 8ebbce8e-dcc7-49f8-b520-4d479aef2a65 457 25 Processed 6593 -8815 9045fc64-a2f2-4cfa-b10e-8b55e6e631e2 homology_load 1.0 2024-04-24 10:18:14 From 8ebbce8e-dcc7-49f8-b520-4d479aef2a65 457 26 Processed 6593 -8816 b9c88135-824b-425b-ab1b-156cd58a0bde homology_ftp 1.0 2024-04-24 10:18:15 From 8ebbce8e-dcc7-49f8-b520-4d479aef2a65 457 27 Processed 6593 -8817 fd821999-6a13-407c-b6ae-bda323fc1795 homology_compute 1.0 2024-04-24 10:18:15 From 60fae37a-afe2-4bb1-9da0-321911dd2856 266 25 Processed 6623 -8818 f9634579-f88a-4892-b2c1-1762da95b69e homology_load 1.0 2024-04-24 10:18:15 From 60fae37a-afe2-4bb1-9da0-321911dd2856 266 26 Processed 6623 -8819 5bce46a4-6eea-4b4d-8843-681ed932e251 homology_ftp 1.0 2024-04-24 10:18:15 From 60fae37a-afe2-4bb1-9da0-321911dd2856 266 27 Processed 6623 -8820 1e3c7c73-8748-4b80-afc5-37a4045e0f71 homology_compute 1.0 2024-04-24 10:18:15 From 06debe4a-2b3e-4fab-991e-26863dbe8af8 323 25 Processed 6699 -8821 0c048acb-d4e2-441f-8d8c-86b83e9bf23d homology_load 1.0 2024-04-24 10:18:15 From 06debe4a-2b3e-4fab-991e-26863dbe8af8 323 26 Processed 6699 -8822 021b89f1-0f7f-4478-97aa-0accd0a7606e homology_ftp 1.0 2024-04-24 10:18:15 From 06debe4a-2b3e-4fab-991e-26863dbe8af8 323 27 Processed 6699 -8823 c929d7cd-aa29-4605-863d-60072b9eccc8 homology_compute 1.0 2024-04-24 10:18:15 From f3abf167-6a8f-45cc-b753-22a955123758 387 25 Processed 6896 -8824 ecdd3f2d-74ba-4a2b-96e9-c43bce42e266 homology_load 1.0 2024-04-24 10:18:15 From f3abf167-6a8f-45cc-b753-22a955123758 387 26 Processed 6896 -8825 f7834e60-f18a-4ec6-b8c3-def92135f691 homology_ftp 1.0 2024-04-24 10:18:15 From f3abf167-6a8f-45cc-b753-22a955123758 387 27 Processed 6896 -8826 2f3e7f30-2c95-4bcf-a02f-3f8819ab6562 homology_compute 1.0 2024-04-24 10:18:15 From 9681f4c2-afb4-4a08-8e4d-f26363f65ddf 369 25 Processed 7069 -8827 afdc661a-4e30-411f-b453-14ecf1973672 homology_load 1.0 2024-04-24 10:18:15 From 9681f4c2-afb4-4a08-8e4d-f26363f65ddf 369 26 Processed 7069 -8828 f9fa5df3-e813-4667-aea2-2df4d67ffdb4 homology_ftp 1.0 2024-04-24 10:18:15 From 9681f4c2-afb4-4a08-8e4d-f26363f65ddf 369 27 Processed 7069 -8829 85b782ae-d1a6-4bee-8a48-3dae3c9da7aa homology_compute 1.0 2024-04-24 10:18:15 From d57040b6-0ef5-4e6b-97ef-be0ad94d3a61 424 25 Processed 7177 -8830 3b89fcc0-1809-4de6-ae4e-e82bf34c26bf homology_load 1.0 2024-04-24 10:18:15 From d57040b6-0ef5-4e6b-97ef-be0ad94d3a61 424 26 Processed 7177 -8831 e1a9a2ed-318a-4e57-9136-ecb33082d71c homology_ftp 1.0 2024-04-24 10:18:15 From d57040b6-0ef5-4e6b-97ef-be0ad94d3a61 424 27 Processed 7177 -8832 003c2932-ba1d-47b8-8f32-88b27bd79d87 homology_compute 1.0 2024-04-24 10:18:15 From c4f0eec5-f4b3-4cef-8369-7b13932509e1 284 25 Processed 7320 -8833 9fa37967-8ace-4f6d-925b-dce75a70ab79 homology_load 1.0 2024-04-24 10:18:15 From c4f0eec5-f4b3-4cef-8369-7b13932509e1 284 26 Processed 7320 -8834 94360425-5f48-4bc9-8863-2011b3e8115d homology_ftp 1.0 2024-04-24 10:18:15 From c4f0eec5-f4b3-4cef-8369-7b13932509e1 284 27 Processed 7320 -8835 9c874e53-bf5c-4c49-8a31-123c1025a7a6 homology_compute 1.0 2024-04-24 10:18:15 From d51ab85a-f037-47a3-ba53-423ad8e42669 383 25 Processed 7535 -8836 ff6239d9-bd90-49f9-b3a5-0cee348267b7 homology_load 1.0 2024-04-24 10:18:15 From d51ab85a-f037-47a3-ba53-423ad8e42669 383 26 Processed 7535 -8837 b7b5b776-30f0-469f-bd19-707ba2fa8f3c homology_ftp 1.0 2024-04-24 10:18:15 From d51ab85a-f037-47a3-ba53-423ad8e42669 383 27 Processed 7535 -8838 18783f74-ffd9-4e46-90bd-f18d7ca00896 homology_compute 1.0 2024-04-24 10:18:15 From 254a68c7-f512-446d-a958-983a2713daf2 359 25 Processed 7603 -8839 e2ea6dca-fd18-4cc5-8120-d724bea5a5bd homology_load 1.0 2024-04-24 10:18:15 From 254a68c7-f512-446d-a958-983a2713daf2 359 26 Processed 7603 -8840 86288d80-6d0e-4da8-9c3c-a2a78b740f63 homology_ftp 1.0 2024-04-24 10:18:15 From 254a68c7-f512-446d-a958-983a2713daf2 359 27 Processed 7603 -8841 c40f1ff1-2da6-4569-9c95-dae534bdb59c homology_compute 1.0 2024-04-24 10:18:15 From 9f2a7c92-e04a-443f-a991-1481a9466456 313 25 Processed 7785 -8842 9e45ec23-6f77-40b3-b487-717981b6789a homology_load 1.0 2024-04-24 10:18:15 From 9f2a7c92-e04a-443f-a991-1481a9466456 313 26 Processed 7785 -8843 ea7e6915-8d77-4eee-bb3c-35e93a41e7e9 homology_ftp 1.0 2024-04-24 10:18:15 From 9f2a7c92-e04a-443f-a991-1481a9466456 313 27 Processed 7785 -8844 3c7d8319-a57c-447b-b47f-5a1873a0b018 homology_compute 1.0 2024-04-24 10:18:15 From 681ceca3-3336-4b92-ac11-43b5fcabec62 251 25 Processed 7820 -8845 b9dac26c-ecc2-4ec4-a8ac-d240cd0b748e homology_load 1.0 2024-04-24 10:18:15 From 681ceca3-3336-4b92-ac11-43b5fcabec62 251 26 Processed 7820 -8846 c2772f23-1c2f-4e55-8453-b6985475a629 homology_ftp 1.0 2024-04-24 10:18:15 From 681ceca3-3336-4b92-ac11-43b5fcabec62 251 27 Processed 7820 -8847 7e892555-9d54-479e-9d2c-1fab7e45c03f homology_compute 1.0 2024-04-24 10:18:15 From d78259af-f491-42de-9cbf-de744b09efee 332 25 Processed 7847 -8848 3e2a34c6-cd2b-4e1f-bac4-9acf67254bf8 homology_load 1.0 2024-04-24 10:18:15 From d78259af-f491-42de-9cbf-de744b09efee 332 26 Processed 7847 -8849 4b97ed95-560c-4a4c-95c1-44b654ad0fa0 homology_ftp 1.0 2024-04-24 10:18:15 From d78259af-f491-42de-9cbf-de744b09efee 332 27 Processed 7847 -8850 10e0efaa-e444-4e8d-927e-a761aa30969f homology_compute 1.0 2024-04-24 10:18:15 From b6472939-9e49-4d46-b93e-304910acabf3 4352 25 Processed 8392 -8851 369f5f6b-763f-44ea-8f13-862c06461346 homology_load 1.0 2024-04-24 10:18:15 From b6472939-9e49-4d46-b93e-304910acabf3 4352 26 Processed 8392 -8852 f04c6b1b-4458-4d28-aa47-b43ea12b08c9 homology_ftp 1.0 2024-04-24 10:18:15 From b6472939-9e49-4d46-b93e-304910acabf3 4352 27 Processed 8392 -8853 9ca4293d-6710-4903-9fc1-8417265ce27d genebuild_compute ENS01 2024-04-24 10:18:15 From ed8ca387-38e3-4bfe-8b85-757a59b95126 97 8 Processed 178 -8854 570f2d24-a2c9-4bb2-895f-8653c0def936 xrefs ENS01 2024-04-24 10:18:15 From 9ca4293d-6710-4903-9fc1-8417265ce27d 97 13 Processed 8853 -8855 0f80b03b-1145-4cdd-bf93-9df96b65f160 protein_features ENS01 2024-04-24 10:18:15 From 9ca4293d-6710-4903-9fc1-8417265ce27d 97 14 Processed 8853 -8856 11584801-c419-4acf-bb1f-119303afc0d4 alpha_fold ENS01 2024-04-24 10:18:15 From 9ca4293d-6710-4903-9fc1-8417265ce27d 97 15 Processed 8853 -8857 8ab46307-5632-4b68-9815-83997bf53b94 genebuild_files ENS01 2024-04-24 10:18:15 From ed8ca387-38e3-4bfe-8b85-757a59b95126 97 9 Processed 178 -8858 1848bff4-5285-4c3a-abcc-62869d41dd87 blast ENS01 2024-04-24 10:18:15 From 8ab46307-5632-4b68-9815-83997bf53b94 97 16 Processed 8857 -8859 1861572a-1bde-4720-bbfa-951b9d8a4456 ftp_dumps ENS01 2024-04-24 10:18:15 From 8ab46307-5632-4b68-9815-83997bf53b94 97 17 Processed 8857 -8860 2109cb44-362b-4571-94da-67859d194824 genebuild_web ENS01 2024-04-24 10:18:15 From ed8ca387-38e3-4bfe-8b85-757a59b95126 97 11 Processed 178 -8861 2f09c74f-c2cc-4fea-8d86-6e7461b3366d thoas_dumps ENS01 2024-04-24 10:18:15 From 2109cb44-362b-4571-94da-67859d194824 97 18 Processed 8860 -8862 fdca5cd5-9a2f-4c94-bc52-fcdda4070e4e browser_files ENS01 2024-04-24 10:18:15 From 2109cb44-362b-4571-94da-67859d194824 97 20 Processed 8860 -8863 a2a1216d-2cad-41d8-b115-a6711989abd5 checksums ENS01 2024-04-24 10:18:15 From 2109cb44-362b-4571-94da-67859d194824 97 23 Processed 8860 -8864 7ed8ac66-3af1-419f-ab9c-08d2a40887a2 genebuild_compute ENS01 2024-04-24 10:18:16 From f9690d7e-26c1-459d-8102-0c4a1a468806 9 8 Processed 896 -8865 f2e96294-16ce-4575-af45-f9a2c46383d7 xrefs ENS01 2024-04-24 10:18:16 From 7ed8ac66-3af1-419f-ab9c-08d2a40887a2 9 13 Processed 8864 -8866 165a40b7-2eee-4ba4-b4dc-8b6ad2402004 protein_features ENS01 2024-04-24 10:18:16 From 7ed8ac66-3af1-419f-ab9c-08d2a40887a2 9 14 Processed 8864 -8867 d3aee3e0-6b4b-43c9-8c52-19a18f91f824 alpha_fold ENS01 2024-04-24 10:18:16 From 7ed8ac66-3af1-419f-ab9c-08d2a40887a2 9 15 Processed 8864 -8868 b66be831-e87b-4bd5-ba89-fc283cc50193 genebuild_files ENS01 2024-04-24 10:18:16 From f9690d7e-26c1-459d-8102-0c4a1a468806 9 9 Processed 896 -8869 79505983-6bf5-4828-8240-4d51a36a3171 blast ENS01 2024-04-24 10:18:16 From b66be831-e87b-4bd5-ba89-fc283cc50193 9 16 Processed 8868 -8870 2b747467-a74c-488d-a900-3f6385c1f26a ftp_dumps ENS01 2024-04-24 10:18:16 From b66be831-e87b-4bd5-ba89-fc283cc50193 9 17 Processed 8868 -8871 b42fdfd2-7bca-4cd1-bee4-46287457ee93 genebuild_web ENS01 2024-04-24 10:18:16 From f9690d7e-26c1-459d-8102-0c4a1a468806 9 11 Processed 896 -8872 1568bc5d-9fba-42df-87b3-52f77a8552b7 thoas_dumps ENS01 2024-04-24 10:18:16 From b42fdfd2-7bca-4cd1-bee4-46287457ee93 9 18 Processed 8871 -8873 cff11c40-12e8-469d-945f-c1c6ffb852f5 browser_files ENS01 2024-04-24 10:18:16 From b42fdfd2-7bca-4cd1-bee4-46287457ee93 9 20 Processed 8871 -8874 11e13c3e-5773-4f30-86c1-6c251f9c4c70 checksums ENS01 2024-04-24 10:18:16 From b42fdfd2-7bca-4cd1-bee4-46287457ee93 9 23 Processed 8871 -8875 e79f65ba-08a5-4aca-b3b9-08ff7c36ba70 evidence 1.0 2024-04-24 10:18:16 From 0c1cdfea-cb10-4a08-8f75-4158658d6a02 597 4 Processed 1496 -8876 7d15f5de-2e98-44b0-ba74-9c70bfd450c6 short_variant 1.0 2024-04-24 10:18:16 From 0c1cdfea-cb10-4a08-8f75-4158658d6a02 597 5 Processed 1496 -8877 ea922b02-b8fe-4f7c-ac4d-a133acc5f532 browser_files 1.0 2024-04-24 10:18:16 From 0c1cdfea-cb10-4a08-8f75-4158658d6a02 597 31 Processed 1496 -8878 7a89be81-4103-4eb8-98b2-23e96e0c4f76 genebuild_compute EXT01 2024-04-24 10:18:16 From cc3c7f95-b5dc-4cc1-aa15-2817c89bd1e2 1 8 Released 2 -8879 20c06eab-391d-4b06-943c-0754f0fef146 xrefs EXT01 2024-04-24 10:18:16 From 7a89be81-4103-4eb8-98b2-23e96e0c4f76 1 13 Released 8878 -8880 a29813e0-c950-40fc-b970-a360a10a15b6 protein_features EXT01 2024-04-24 10:18:16 From 7a89be81-4103-4eb8-98b2-23e96e0c4f76 1 14 Released 8878 -8881 5c0b59d2-faf4-4297-b10d-c304e1f55998 alpha_fold EXT01 2024-04-24 10:18:16 From 7a89be81-4103-4eb8-98b2-23e96e0c4f76 1 15 Released 8878 -8882 8d33dbd0-93d9-4279-bdfe-21f756afc898 genebuild_files EXT01 2024-04-24 10:18:16 From cc3c7f95-b5dc-4cc1-aa15-2817c89bd1e2 1 9 Released 2 -8883 7e0ec09a-0ed9-461f-abb4-bb8de9f9b842 blast EXT01 2024-04-24 10:18:16 From 8d33dbd0-93d9-4279-bdfe-21f756afc898 1 16 Released 8882 -8884 551593dc-42ad-45ec-8311-c052330feaac ftp_dumps EXT01 2024-04-24 10:18:16 From 8d33dbd0-93d9-4279-bdfe-21f756afc898 1 17 Released 8882 -8885 7b5a6b07-d345-479f-95a0-c9a6712eb747 genebuild_web EXT01 2024-04-24 10:18:16 From cc3c7f95-b5dc-4cc1-aa15-2817c89bd1e2 1 11 Released 2 -8886 711ea653-09ba-47da-b6ff-585c25548546 thoas_dumps EXT01 2024-04-24 10:18:16 From 7b5a6b07-d345-479f-95a0-c9a6712eb747 1 18 Released 8885 -8887 1f7df867-7a26-4b59-98cb-866e63a215f9 browser_files EXT01 2024-04-24 10:18:16 From 7b5a6b07-d345-479f-95a0-c9a6712eb747 1 20 Released 8885 -8888 34d43bd3-3061-43ac-b0ab-7eb4d7edd3b3 checksums EXT01 2024-04-24 10:18:16 From 7b5a6b07-d345-479f-95a0-c9a6712eb747 1 23 Released 8885 -8889 fd8f0c4f-59c6-4d40-8dc9-1784f312b935 genebuild_web EXT01 2024-04-24 10:18:16 From cc3c7f95-b5dc-4cc1-aa15-2817c89bd1e2 1 12 Released 2 -8890 2b3158ad-aedc-464d-bad1-7dc448a1623a thoas_load EXT01 2024-04-24 10:18:16 From fd8f0c4f-59c6-4d40-8dc9-1784f312b935 1 19 Released 8889 -8891 62b23f57-bc61-4ade-aa1d-77ecf7f8b18c genebuild_track EXT01 2024-04-24 10:18:16 From fd8f0c4f-59c6-4d40-8dc9-1784f312b935 1 21 Released 8889 -8892 620df1db-d908-4406-9f1d-e97b11c798c7 refget_load EXT01 2024-04-24 10:18:16 From fd8f0c4f-59c6-4d40-8dc9-1784f312b935 1 24 Released 8889 -8893 35485780-095a-44ad-a85b-ed37aff1f5ac genebuild_compute EXT01 2024-04-24 10:18:16 From f202cd36-d0dc-40df-9dd6-a8218e0d1366 14 8 Released 24 -8894 7760299d-bc48-4424-82ba-d069153212a1 xrefs EXT01 2024-04-24 10:18:16 From 35485780-095a-44ad-a85b-ed37aff1f5ac 14 13 Released 8893 -8895 95b0c4a8-15fe-4d7e-b07a-3ebc08a95ddf protein_features EXT01 2024-04-24 10:18:16 From 35485780-095a-44ad-a85b-ed37aff1f5ac 14 14 Released 8893 -8896 256fe2d6-3e6f-4c89-9d46-1616de1bac53 alpha_fold EXT01 2024-04-24 10:18:16 From 35485780-095a-44ad-a85b-ed37aff1f5ac 14 15 Released 8893 -8897 273822b7-89dd-4eef-a0d7-c6aae2322939 genebuild_files EXT01 2024-04-24 10:18:16 From f202cd36-d0dc-40df-9dd6-a8218e0d1366 14 9 Released 24 -8898 c090e67c-df71-401f-a90c-1fca8aee34fe blast EXT01 2024-04-24 10:18:16 From 273822b7-89dd-4eef-a0d7-c6aae2322939 14 16 Released 8897 -8899 c51e1970-4e48-41cc-8955-be9172cf5f23 ftp_dumps EXT01 2024-04-24 10:18:16 From 273822b7-89dd-4eef-a0d7-c6aae2322939 14 17 Released 8897 -8900 1dfc15fc-d7d2-499b-beb4-e6ce0f06f743 genebuild_web EXT01 2024-04-24 10:18:16 From f202cd36-d0dc-40df-9dd6-a8218e0d1366 14 11 Released 24 -8901 d5e0ebee-7117-494d-bc1b-6d2c70d9491c thoas_dumps EXT01 2024-04-24 10:18:16 From 1dfc15fc-d7d2-499b-beb4-e6ce0f06f743 14 18 Released 8900 -8902 6f8f8f0b-e755-45cc-97eb-d9c182e873b0 browser_files EXT01 2024-04-24 10:18:16 From 1dfc15fc-d7d2-499b-beb4-e6ce0f06f743 14 20 Released 8900 -8903 ff6ccd65-e644-42e9-8f38-04a8f253bef9 checksums EXT01 2024-04-24 10:18:16 From 1dfc15fc-d7d2-499b-beb4-e6ce0f06f743 14 23 Released 8900 -8904 8945cfec-17e5-48af-83f4-79907740fddd genebuild_web EXT01 2024-04-24 10:18:16 From f202cd36-d0dc-40df-9dd6-a8218e0d1366 14 12 Released 24 -8905 2bdb1dcf-b45b-4d91-82ba-d1c9137ae7b9 thoas_load EXT01 2024-04-24 10:18:16 From 8945cfec-17e5-48af-83f4-79907740fddd 14 19 Released 8904 -8906 a88f003b-d5bf-479f-aeeb-4696a3de728a genebuild_track EXT01 2024-04-24 10:18:16 From 8945cfec-17e5-48af-83f4-79907740fddd 14 21 Released 8904 -8907 fce24263-d668-4daa-9eac-27abdebf3a90 refget_load EXT01 2024-04-24 10:18:16 From 8945cfec-17e5-48af-83f4-79907740fddd 14 24 Released 8904 -8908 07f1ea6a-bc29-4426-ab28-db9e8df67135 genebuild_compute GENCODE19 2024-04-24 10:18:16 From e456d1c2-eea2-40f1-83ee-31912905b695 40 8 Released 62 -8909 50c5df76-ad83-4cdd-822b-59a0a5a5caaa xrefs GENCODE19 2024-04-24 10:18:16 From 07f1ea6a-bc29-4426-ab28-db9e8df67135 40 13 Released 8908 -8910 f45160df-1af8-4bb4-b52b-099c9f5ce005 protein_features GENCODE19 2024-04-24 10:18:16 From 07f1ea6a-bc29-4426-ab28-db9e8df67135 40 14 Released 8908 -8911 9bb457f3-a4c6-43ea-a5d8-df8193e33e2b alpha_fold GENCODE19 2024-04-24 10:18:16 From 07f1ea6a-bc29-4426-ab28-db9e8df67135 40 15 Released 8908 -8912 1cd2a36c-9459-4aae-add4-594bdf9570ae genebuild_files GENCODE19 2024-04-24 10:18:16 From e456d1c2-eea2-40f1-83ee-31912905b695 40 9 Released 62 -8913 f3ac13c6-0943-45e5-b553-6e6ecd5febb0 blast GENCODE19 2024-04-24 10:18:16 From 1cd2a36c-9459-4aae-add4-594bdf9570ae 40 16 Released 8912 -8914 f817d89d-e6c7-474f-b585-a816c9a19926 ftp_dumps GENCODE19 2024-04-24 10:18:16 From 1cd2a36c-9459-4aae-add4-594bdf9570ae 40 17 Released 8912 -8915 6af4ed97-8fde-41f8-9e28-30e4f2ff2d62 genebuild_web GENCODE19 2024-04-24 10:18:16 From e456d1c2-eea2-40f1-83ee-31912905b695 40 11 Released 62 -8916 e055d56a-3878-4ccb-ac8b-56748d103fbd thoas_dumps GENCODE19 2024-04-24 10:18:16 From 6af4ed97-8fde-41f8-9e28-30e4f2ff2d62 40 18 Released 8915 -8917 9c4831c4-8ee3-4058-8325-54a1d642e0a5 browser_files GENCODE19 2024-04-24 10:18:16 From 6af4ed97-8fde-41f8-9e28-30e4f2ff2d62 40 20 Released 8915 -8918 8aa89c54-db39-4f69-9c23-78a2f7077548 checksums GENCODE19 2024-04-24 10:18:16 From 6af4ed97-8fde-41f8-9e28-30e4f2ff2d62 40 23 Released 8915 -8919 c0993f4c-eda0-40e3-807d-ad7ea361e285 genebuild_web GENCODE19 2024-04-24 10:18:16 From e456d1c2-eea2-40f1-83ee-31912905b695 40 12 Released 62 -8920 1bf39fbc-6863-4c23-8960-975a027556e6 thoas_load GENCODE19 2024-04-24 10:18:16 From c0993f4c-eda0-40e3-807d-ad7ea361e285 40 19 Released 8919 -8921 3c1f9d35-8013-40ff-98b5-b62c764f284f genebuild_track GENCODE19 2024-04-24 10:18:16 From c0993f4c-eda0-40e3-807d-ad7ea361e285 40 21 Released 8919 -8922 36d517a8-f972-4350-a44c-88d04d44286f refget_load GENCODE19 2024-04-24 10:18:16 From c0993f4c-eda0-40e3-807d-ad7ea361e285 40 24 Released 8919 -8923 9687952e-9dc8-4240-aece-a968dadbe909 genebuild_compute EXT01 2024-04-24 10:18:17 From 287a5483-55a4-46e6-a58b-a84ba0ddacd6 79 8 Released 148 -8924 3ed69d61-78ad-4cd0-9244-8337c6896de7 xrefs EXT01 2024-04-24 10:18:17 From 9687952e-9dc8-4240-aece-a968dadbe909 79 13 Released 8923 -8925 90cbc64d-d88f-4bdf-ac6c-99e0837f4253 protein_features EXT01 2024-04-24 10:18:17 From 9687952e-9dc8-4240-aece-a968dadbe909 79 14 Released 8923 -8926 a70bc6ec-1678-487e-89a9-c39b338bc624 alpha_fold EXT01 2024-04-24 10:18:17 From 9687952e-9dc8-4240-aece-a968dadbe909 79 15 Released 8923 -8927 704e4912-c270-4647-99bb-e8789d092949 genebuild_files EXT01 2024-04-24 10:18:17 From 287a5483-55a4-46e6-a58b-a84ba0ddacd6 79 9 Released 148 -8928 b90ce8e9-0ddd-4591-b118-ecce2ab04b37 blast EXT01 2024-04-24 10:18:17 From 704e4912-c270-4647-99bb-e8789d092949 79 16 Released 8927 -8929 935a9cee-4712-47ea-8f56-5aacf09b3883 ftp_dumps EXT01 2024-04-24 10:18:17 From 704e4912-c270-4647-99bb-e8789d092949 79 17 Released 8927 -8930 26fe8cb4-05b4-4f6c-83d9-a4b34c32ca08 genebuild_web EXT01 2024-04-24 10:18:17 From 287a5483-55a4-46e6-a58b-a84ba0ddacd6 79 11 Released 148 -8931 b4f77b8b-00e0-4977-9c75-5c2d65a07f0d thoas_dumps EXT01 2024-04-24 10:18:17 From 26fe8cb4-05b4-4f6c-83d9-a4b34c32ca08 79 18 Released 8930 -8932 c82b305f-f783-40ab-af1a-8c2375e8816a browser_files EXT01 2024-04-24 10:18:17 From 26fe8cb4-05b4-4f6c-83d9-a4b34c32ca08 79 20 Released 8930 -8933 fcd75ab2-a057-4d7d-ad37-b7604a7d0764 checksums EXT01 2024-04-24 10:18:17 From 26fe8cb4-05b4-4f6c-83d9-a4b34c32ca08 79 23 Released 8930 -8934 a81f1f36-c2e4-498e-ba53-8c8a28759a4c genebuild_web EXT01 2024-04-24 10:18:17 From 287a5483-55a4-46e6-a58b-a84ba0ddacd6 79 12 Released 148 -8935 56fcabc6-a26f-4007-aef7-28440b60a990 thoas_load EXT01 2024-04-24 10:18:17 From a81f1f36-c2e4-498e-ba53-8c8a28759a4c 79 19 Released 8934 -8936 f9ef4142-f4c9-4def-84af-c9480934d408 genebuild_track EXT01 2024-04-24 10:18:17 From a81f1f36-c2e4-498e-ba53-8c8a28759a4c 79 21 Released 8934 -8937 329e59f5-ff09-4e8f-8398-c5bb37d25a16 refget_load EXT01 2024-04-24 10:18:17 From a81f1f36-c2e4-498e-ba53-8c8a28759a4c 79 24 Released 8934 -8938 d732f47d-4783-4cf3-80ee-566347f27fe5 genebuild_compute GENCODE44 2024-04-24 10:18:17 From 949defef-c4d2-4ab1-8a73-f41d2b3c7719 92 8 Released 172 -8939 f0e56fd2-53a7-4a5d-8c6e-a0e425659e2d xrefs GENCODE44 2024-04-24 10:18:17 From d732f47d-4783-4cf3-80ee-566347f27fe5 92 13 Released 8938 -8940 9e50213a-a3dc-4d86-bff2-2c607ea97be6 protein_features GENCODE44 2024-04-24 10:18:17 From d732f47d-4783-4cf3-80ee-566347f27fe5 92 14 Released 8938 -8941 c683f337-fe1a-4080-8fa6-b2f5921c82f5 alpha_fold GENCODE44 2024-04-24 10:18:17 From d732f47d-4783-4cf3-80ee-566347f27fe5 92 15 Released 8938 -8942 a26a6832-2081-4f10-bdec-9034f9682f88 genebuild_files GENCODE44 2024-04-24 10:18:17 From 949defef-c4d2-4ab1-8a73-f41d2b3c7719 92 9 Released 172 -8943 1bbc28e3-55b6-42aa-a806-231f4d645d79 blast GENCODE44 2024-04-24 10:18:17 From a26a6832-2081-4f10-bdec-9034f9682f88 92 16 Released 8942 -8944 6353961a-be42-43f6-be0e-98dff9a0e6fa ftp_dumps GENCODE44 2024-04-24 10:18:17 From a26a6832-2081-4f10-bdec-9034f9682f88 92 17 Released 8942 -8945 698fc95c-9f61-4a92-8ad8-8dd3f1ec3c77 genebuild_web GENCODE44 2024-04-24 10:18:17 From 949defef-c4d2-4ab1-8a73-f41d2b3c7719 92 11 Released 172 -8946 4f8b970e-054f-48a2-82ee-f638d42cb88c thoas_dumps GENCODE44 2024-04-24 10:18:17 From 698fc95c-9f61-4a92-8ad8-8dd3f1ec3c77 92 18 Released 8945 -8947 893ab5b4-cf72-4ac8-93a9-6b263ef24fec browser_files GENCODE44 2024-04-24 10:18:17 From 698fc95c-9f61-4a92-8ad8-8dd3f1ec3c77 92 20 Released 8945 -8948 0d2e1d80-ec5d-4c83-a777-d0e12dd3a20d checksums GENCODE44 2024-04-24 10:18:17 From 698fc95c-9f61-4a92-8ad8-8dd3f1ec3c77 92 23 Released 8945 -8949 d0c2c132-d6e9-4cc6-b7ea-90aae49fdd97 genebuild_web GENCODE44 2024-04-24 10:18:17 From 949defef-c4d2-4ab1-8a73-f41d2b3c7719 92 12 Released 172 -8950 7cc832cd-2a22-4326-95ec-3b440ad218d0 thoas_load GENCODE44 2024-04-24 10:18:17 From d0c2c132-d6e9-4cc6-b7ea-90aae49fdd97 92 19 Released 8949 -8951 743e6c8d-5fe5-4417-9673-d807b4b494e8 genebuild_track GENCODE44 2024-04-24 10:18:17 From d0c2c132-d6e9-4cc6-b7ea-90aae49fdd97 92 21 Released 8949 -8952 775b793f-124c-4b97-8734-cf38aa2e66d4 refget_load GENCODE44 2024-04-24 10:18:17 From d0c2c132-d6e9-4cc6-b7ea-90aae49fdd97 92 24 Released 8949 -8953 6c9c5c7a-b58a-4fd6-92a2-8c1d2c8e155c genebuild_compute EXT01 2024-04-24 10:18:17 From cfef61f8-7e24-4ed6-945f-baca1b2664a3 214 8 Released 402 -8954 7e303177-8107-4467-affd-60fcb3cb9fe9 xrefs EXT01 2024-04-24 10:18:17 From 6c9c5c7a-b58a-4fd6-92a2-8c1d2c8e155c 214 13 Released 8953 -8955 5ea5d208-db3d-409a-ad3b-90fc25357975 protein_features EXT01 2024-04-24 10:18:17 From 6c9c5c7a-b58a-4fd6-92a2-8c1d2c8e155c 214 14 Released 8953 -8956 bde17d27-1505-4d55-81e1-589b2c160758 alpha_fold EXT01 2024-04-24 10:18:17 From 6c9c5c7a-b58a-4fd6-92a2-8c1d2c8e155c 214 15 Released 8953 -8957 a312a91d-c738-44d2-9117-3289689074bf genebuild_files EXT01 2024-04-24 10:18:17 From cfef61f8-7e24-4ed6-945f-baca1b2664a3 214 9 Released 402 -8958 cabdf704-d7a7-43c6-91b1-ef13643fe743 blast EXT01 2024-04-24 10:18:17 From a312a91d-c738-44d2-9117-3289689074bf 214 16 Released 8957 -8959 35242e52-ac0a-4232-a04e-602712188981 ftp_dumps EXT01 2024-04-24 10:18:17 From a312a91d-c738-44d2-9117-3289689074bf 214 17 Released 8957 -8960 c679cde3-49b9-4b2e-a415-f5e41a2584ad genebuild_web EXT01 2024-04-24 10:18:17 From cfef61f8-7e24-4ed6-945f-baca1b2664a3 214 11 Released 402 -8961 15392806-6489-4ca8-8dfe-4c068b8cbfc5 thoas_dumps EXT01 2024-04-24 10:18:17 From c679cde3-49b9-4b2e-a415-f5e41a2584ad 214 18 Released 8960 -8962 30b00998-cf48-4e19-87a5-0d77a7d8eab8 browser_files EXT01 2024-04-24 10:18:17 From c679cde3-49b9-4b2e-a415-f5e41a2584ad 214 20 Released 8960 -8963 7405d0be-af0e-4a25-909d-79804221ab66 checksums EXT01 2024-04-24 10:18:17 From c679cde3-49b9-4b2e-a415-f5e41a2584ad 214 23 Released 8960 -8964 63902fec-bfaa-49bd-90c7-38910ad11921 genebuild_web EXT01 2024-04-24 10:18:17 From cfef61f8-7e24-4ed6-945f-baca1b2664a3 214 12 Released 402 -8965 84157ee7-4328-4489-a235-062c76cc6bc9 thoas_load EXT01 2024-04-24 10:18:17 From 63902fec-bfaa-49bd-90c7-38910ad11921 214 19 Released 8964 -8966 b1b03468-d47e-4033-b164-e24a77c1419a genebuild_track EXT01 2024-04-24 10:18:17 From 63902fec-bfaa-49bd-90c7-38910ad11921 214 21 Released 8964 -8967 6f136ae5-07b4-457c-9452-b7272490214a refget_load EXT01 2024-04-24 10:18:17 From 63902fec-bfaa-49bd-90c7-38910ad11921 214 24 Released 8964 -8968 0c0887c2-839a-4df9-b9dc-85c7f1605e16 genebuild_compute EXT01 2024-04-24 10:18:17 From ea69f164-cc77-4671-bf97-c7f537dc400e 217 8 Released 406 -8969 8543b0f0-0798-4af5-9ee2-68ab081ce2f9 xrefs EXT01 2024-04-24 10:18:17 From 0c0887c2-839a-4df9-b9dc-85c7f1605e16 217 13 Released 8968 -8970 915f67e9-46aa-419c-b1f1-3e3fe8d37c9f protein_features EXT01 2024-04-24 10:18:17 From 0c0887c2-839a-4df9-b9dc-85c7f1605e16 217 14 Released 8968 -8971 9429df49-33dd-4c4b-bb1e-a086f9be1311 alpha_fold EXT01 2024-04-24 10:18:17 From 0c0887c2-839a-4df9-b9dc-85c7f1605e16 217 15 Released 8968 -8972 b3979b48-55ad-42a3-9f03-b0e6eb1b8408 genebuild_files EXT01 2024-04-24 10:18:17 From ea69f164-cc77-4671-bf97-c7f537dc400e 217 9 Released 406 -8973 4d258abd-6847-486c-9196-bb4da2a13cd0 blast EXT01 2024-04-24 10:18:17 From b3979b48-55ad-42a3-9f03-b0e6eb1b8408 217 16 Released 8972 -8974 fe818538-d329-4e46-b311-71a13f546eb7 ftp_dumps EXT01 2024-04-24 10:18:17 From b3979b48-55ad-42a3-9f03-b0e6eb1b8408 217 17 Released 8972 -8975 537ac1ca-5839-4d21-8f66-815253a29de8 genebuild_web EXT01 2024-04-24 10:18:17 From ea69f164-cc77-4671-bf97-c7f537dc400e 217 11 Released 406 -8976 e6c747ba-6515-447c-9dec-223a5e7b5ab2 thoas_dumps EXT01 2024-04-24 10:18:17 From 537ac1ca-5839-4d21-8f66-815253a29de8 217 18 Released 8975 -8977 2cd06f2e-8ebb-400a-9cd2-6a313dd67b79 browser_files EXT01 2024-04-24 10:18:17 From 537ac1ca-5839-4d21-8f66-815253a29de8 217 20 Released 8975 -8978 e3282d40-5aec-4970-924c-20fb943324fe checksums EXT01 2024-04-24 10:18:18 From 537ac1ca-5839-4d21-8f66-815253a29de8 217 23 Released 8975 -8979 8d7811aa-63db-480f-b2cd-28fbd0e414e7 genebuild_web EXT01 2024-04-24 10:18:18 From ea69f164-cc77-4671-bf97-c7f537dc400e 217 12 Released 406 -8980 bb86329a-3ff5-4aa1-bccb-fa10866c0400 thoas_load EXT01 2024-04-24 10:18:18 From 8d7811aa-63db-480f-b2cd-28fbd0e414e7 217 19 Released 8979 -8981 58a02fb2-387a-4d93-9798-b8a2ec2a990b genebuild_track EXT01 2024-04-24 10:18:18 From 8d7811aa-63db-480f-b2cd-28fbd0e414e7 217 21 Released 8979 -8982 a9e9630d-7e6c-4a03-8b76-3eea9d58ddd5 refget_load EXT01 2024-04-24 10:18:18 From 8d7811aa-63db-480f-b2cd-28fbd0e414e7 217 24 Released 8979 -8983 b847fdd0-205d-4010-a216-a150eb9dcf62 evidence 1.0 2024-04-24 10:18:18 From bf1f5064-8520-4f19-84e4-449aa6c1c1e2 673 4 Released 1391 -8984 d6a11f1e-41fd-409b-a42c-6bb5eed4536d short_variant 1.0 2024-04-24 10:18:18 From bf1f5064-8520-4f19-84e4-449aa6c1c1e2 673 5 Released 1391 -8985 8ba10f26-694b-4e8d-9888-aaa860581af7 variation_ftp 1.0 2024-04-24 10:18:18 From bf1f5064-8520-4f19-84e4-449aa6c1c1e2 673 29 Released 1391 -8986 62bad9a6-2406-4a93-bc6d-1a3c871fce94 browser_files 1.0 2024-04-24 10:18:18 From bf1f5064-8520-4f19-84e4-449aa6c1c1e2 673 31 Released 1391 -8987 48d069c5-f099-414d-bac1-b682f78a1fde variation_track 1.0 2024-04-24 10:18:18 From bf1f5064-8520-4f19-84e4-449aa6c1c1e2 673 32 Released 1391 -8988 07a02fee-c19f-4c11-b35a-2e2b51d67378 evidence 1.0 2024-04-24 10:18:18 From 5b869bbb-098f-4827-afc0-532a2bc88903 674 4 Released 1392 -8989 cc377779-9dc4-4cfb-ba14-81531a7fb69b short_variant 1.0 2024-04-24 10:18:18 From 5b869bbb-098f-4827-afc0-532a2bc88903 674 5 Released 1392 -8990 d2261995-a526-4728-a6a8-568b17c217d3 variation_ftp 1.0 2024-04-24 10:18:18 From 5b869bbb-098f-4827-afc0-532a2bc88903 674 29 Released 1392 -8991 97b81726-34e3-4dbc-976a-ac2e9fa64c17 browser_files 1.0 2024-04-24 10:18:18 From 5b869bbb-098f-4827-afc0-532a2bc88903 674 31 Released 1392 -8992 49d855db-8570-4a4d-9290-d2fc3b1ba02d variation_track 1.0 2024-04-24 10:18:18 From 5b869bbb-098f-4827-afc0-532a2bc88903 674 32 Released 1392 -8993 a487cac9-c58c-48d9-a0b3-2808346ce541 evidence 1.0 2024-04-24 10:18:18 From 0a0bed83-72c7-4f8a-a1cb-97450ef82495 644 4 Released 1528 -8994 c5dec575-8720-46f3-ae5d-a86f22760b30 short_variant 1.0 2024-04-24 10:18:18 From 0a0bed83-72c7-4f8a-a1cb-97450ef82495 644 5 Released 1528 -8995 05e09f88-0d11-47f8-8db0-92714ce6ba42 variation_ftp 1.0 2024-04-24 10:18:18 From 0a0bed83-72c7-4f8a-a1cb-97450ef82495 644 29 Released 1528 -8996 96da2064-5735-4a91-bd1f-dbeff5548b50 browser_files 1.0 2024-04-24 10:18:18 From 0a0bed83-72c7-4f8a-a1cb-97450ef82495 644 31 Released 1528 -8997 c50d02a2-16a0-44a2-be1a-32b63ab04deb variation_track 1.0 2024-04-24 10:18:18 From 0a0bed83-72c7-4f8a-a1cb-97450ef82495 644 32 Released 1528 -8998 c1829f04-cccd-436d-9f0a-1d82ed117064 evidence 1.0 2024-04-24 10:18:18 From ff7cb333-fc39-4f00-93e0-65a0d5eb596b 653 4 Released 1537 -8999 9bf0c2f7-2dc8-4f27-b578-a1cb277e1a63 short_variant 1.0 2024-04-24 10:18:18 From ff7cb333-fc39-4f00-93e0-65a0d5eb596b 653 5 Released 1537 -9000 f3206998-32e5-465b-8b76-d21e3c24bb18 variation_ftp 1.0 2024-04-24 10:18:18 From ff7cb333-fc39-4f00-93e0-65a0d5eb596b 653 29 Released 1537 -9001 dd7a4ab7-d890-488e-b014-b6e9eacf8a3a browser_files 1.0 2024-04-24 10:18:18 From ff7cb333-fc39-4f00-93e0-65a0d5eb596b 653 31 Released 1537 -9002 263fda44-a84a-4879-912e-4de5e75be0ea variation_track 1.0 2024-04-24 10:18:18 From ff7cb333-fc39-4f00-93e0-65a0d5eb596b 653 32 Released 1537 -9003 963a3baf-5f86-4372-b0ce-79726329ce59 evidence 1.0 2024-04-24 10:18:18 From e659bef9-22f7-4ad2-8215-4a48ecd228df 660 4 Released 1544 -9004 cdabf7a3-e5bf-4afa-ae41-80eeca2ec76d short_variant 1.0 2024-04-24 10:18:18 From e659bef9-22f7-4ad2-8215-4a48ecd228df 660 5 Released 1544 -9005 c708b53e-6fbb-49d9-b9c3-a8a09c3b7f0d variation_ftp 1.0 2024-04-24 10:18:18 From e659bef9-22f7-4ad2-8215-4a48ecd228df 660 29 Released 1544 -9006 029df488-7091-47ed-9db7-e1abf23cc429 browser_files 1.0 2024-04-24 10:18:18 From e659bef9-22f7-4ad2-8215-4a48ecd228df 660 31 Released 1544 -9007 b4863deb-6e3f-4f56-9904-f64cb1783409 variation_track 1.0 2024-04-24 10:18:18 From e659bef9-22f7-4ad2-8215-4a48ecd228df 660 32 Released 1544 -9008 9feb9d70-8966-49f4-a385-3777b66ca2a2 homology_compute 1.0 2024-04-24 10:18:18 From 4b02b11e-397c-4a4f-8c13-8b65efb87030 266 25 Released 2291 -9009 da2808ff-be03-4b52-aeb8-5415e509a8b1 homology_load 1.0 2024-04-24 10:18:18 From 4b02b11e-397c-4a4f-8c13-8b65efb87030 266 26 Released 2291 -9010 9aaf2c4c-f026-4c6c-8b65-c475e3be683a homology_ftp 1.0 2024-04-24 10:18:18 From 4b02b11e-397c-4a4f-8c13-8b65efb87030 266 27 Released 2291 -9011 049cc616-bf6e-4f21-b3b3-6ba0672e79ac homology_compute 1.0 2024-04-24 10:18:18 From aebf0b81-4234-4aa9-85cc-abfe91f5eac2 323 25 Released 2348 -9012 0d4335ae-fcd5-4ac4-a1a7-d766c4dad6bc homology_load 1.0 2024-04-24 10:18:18 From aebf0b81-4234-4aa9-85cc-abfe91f5eac2 323 26 Released 2348 -9013 7642b849-ba92-4f47-960c-9dc07b2488c8 homology_ftp 1.0 2024-04-24 10:18:18 From aebf0b81-4234-4aa9-85cc-abfe91f5eac2 323 27 Released 2348 -9014 b2f53226-7a8f-41cc-bef3-f1dc6d7324ad homology_compute 1.0 2024-04-24 10:18:18 From caac6097-4921-4c10-bfc0-1c3e9b2604dc 332 25 Released 2357 -9015 d174a9a1-39df-4e81-94bd-98db30505730 homology_load 1.0 2024-04-24 10:18:18 From caac6097-4921-4c10-bfc0-1c3e9b2604dc 332 26 Released 2357 -9016 23f92c2d-83a6-43de-87c7-a83aad6406a5 homology_ftp 1.0 2024-04-24 10:18:18 From caac6097-4921-4c10-bfc0-1c3e9b2604dc 332 27 Released 2357 -9017 41b03ede-f8f2-4c4c-b1f7-86d506798835 homology_compute 1.0 2024-04-24 10:18:18 From 9f45f1a6-d4d0-4c02-9509-dec5a0d523fb 359 25 Released 2384 -9018 1e809aac-bfe5-4a89-9472-c2f183205f3b homology_load 1.0 2024-04-24 10:18:18 From 9f45f1a6-d4d0-4c02-9509-dec5a0d523fb 359 26 Released 2384 -9019 8c572ee1-140c-43f4-9c6f-287662793018 homology_ftp 1.0 2024-04-24 10:18:18 From 9f45f1a6-d4d0-4c02-9509-dec5a0d523fb 359 27 Released 2384 -9020 70452894-16f7-4b8c-a780-1dd869bec2fa homology_compute 1.0 2024-04-24 10:18:18 From b67e1761-3341-4965-9a5b-041cb8230cb3 369 25 Released 2394 -9021 ba8bd474-977f-4c52-88b4-7cd99f288f2c homology_load 1.0 2024-04-24 10:18:18 From b67e1761-3341-4965-9a5b-041cb8230cb3 369 26 Released 2394 -9022 8b148701-df21-4cdc-8123-66ba2a7a59e0 homology_ftp 1.0 2024-04-24 10:18:18 From b67e1761-3341-4965-9a5b-041cb8230cb3 369 27 Released 2394 -9023 f1ba169e-f9a4-4a01-80cc-f2e2085a07cb homology_compute 1.0 2024-04-24 10:18:18 From e6df4d05-8567-4143-8ea0-c6ad1b5a3fc2 424 25 Released 2449 -9024 794e6ba8-771e-4a7e-b851-f1b48f20806f homology_load 1.0 2024-04-24 10:18:19 From e6df4d05-8567-4143-8ea0-c6ad1b5a3fc2 424 26 Released 2449 -9025 ae16f773-4395-4908-82f1-d5517b75ace5 homology_ftp 1.0 2024-04-24 10:18:19 From e6df4d05-8567-4143-8ea0-c6ad1b5a3fc2 424 27 Released 2449 -9026 d84af6e2-313c-4930-bbac-8b74def7c6b7 homology_compute 1.0 2024-04-24 10:18:19 From 58df568e-48c1-4a3b-838b-448540392f9c 433 25 Released 2458 -9027 b6156c27-8aef-4172-b150-6d6b27b0f4c6 homology_load 1.0 2024-04-24 10:18:19 From 58df568e-48c1-4a3b-838b-448540392f9c 433 26 Released 2458 -9028 73f4fa74-7d7c-4bb4-a71e-1194ef1c244a homology_ftp 1.0 2024-04-24 10:18:19 From 58df568e-48c1-4a3b-838b-448540392f9c 433 27 Released 2458 -9029 078ff308-f035-4c75-aa83-66e0697da057 browser_files 1.0 2024-04-24 10:18:19 From 679d6452-799c-4a2f-8906-0db6c639e498 670 33 Released 2518 -9030 9b6fc878-fa10-49b6-bd99-98ade5e0252f regulation_track 1.0 2024-04-24 10:18:19 From 679d6452-799c-4a2f-8906-0db6c639e498 670 34 Released 2518 -9031 852986f9-e63a-44b7-b182-7182d7070e1f regulation_ftp 1.0 2024-04-24 10:18:19 From 679d6452-799c-4a2f-8906-0db6c639e498 670 35 Released 2518 -9032 7573b939-da2c-4997-8002-9da717ba79d2 genebuild_compute ENS01 2024-04-24 16:07:22 From 2ef7c056-847e-4742-a68b-18c3ece068aa 18 8 Submitted 38 -9033 7bb8919c-d9e0-4eca-9a49-7a6d9e311c8d xrefs ENS01 2024-04-24 16:07:22 From 7573b939-da2c-4997-8002-9da717ba79d2 18 13 Submitted 9032 -9034 a6a43d07-4ddd-4935-96f3-137882be6b5f protein_features ENS01 2024-04-24 16:07:22 From 7573b939-da2c-4997-8002-9da717ba79d2 18 14 Submitted 9032 -9035 3286e886-cdde-45e2-a92c-2a5b7a43744b alpha_fold ENS01 2024-04-24 16:07:22 From 7573b939-da2c-4997-8002-9da717ba79d2 18 15 Submitted 9032 -9036 a41c7eb3-8dd9-4449-bef3-8a2798d324c9 genebuild_files ENS01 2024-04-24 16:07:22 From 2ef7c056-847e-4742-a68b-18c3ece068aa 18 9 Submitted 38 -9037 384e30bb-1940-475b-a7f1-94c3b5fa6251 blast ENS01 2024-04-24 16:07:22 From a41c7eb3-8dd9-4449-bef3-8a2798d324c9 18 16 Submitted 9036 -9038 b0d8755a-d01b-4910-b84c-0e15ef1293ba ftp_dumps ENS01 2024-04-24 16:07:22 From a41c7eb3-8dd9-4449-bef3-8a2798d324c9 18 17 Submitted 9036 -9039 3666e777-8cb5-420e-8f45-7469253db5f6 genebuild_web ENS01 2024-04-24 16:07:22 From 2ef7c056-847e-4742-a68b-18c3ece068aa 18 11 Submitted 38 -9040 282e982f-493b-4f13-a927-3f9e3dc9a8a8 thoas_dumps ENS01 2024-04-24 16:07:22 From 3666e777-8cb5-420e-8f45-7469253db5f6 18 18 Submitted 9039 -9041 f39ac854-157a-48a8-8b81-4345391c59c3 browser_files ENS01 2024-04-24 16:07:22 From 3666e777-8cb5-420e-8f45-7469253db5f6 18 20 Submitted 9039 -9042 6a86ad7d-67d8-4c0b-a504-966225539fc0 checksums ENS01 2024-04-24 16:07:22 From 3666e777-8cb5-420e-8f45-7469253db5f6 18 23 Submitted 9039 -9043 a128c1b9-6f98-40cf-a3ae-321d5e4e1106 genebuild_compute ENS01 2024-04-24 16:07:22 From 11a0be7f-99ae-45d3-a004-dc19bb562330 100 8 Processed 184 -9044 65bacf69-42d4-439c-a436-f76208677771 xrefs ENS01 2024-04-24 16:07:22 From a128c1b9-6f98-40cf-a3ae-321d5e4e1106 100 13 Processed 9043 -9045 d4716792-c4af-4ec9-a14c-220f4768ed88 protein_features ENS01 2024-04-24 16:07:22 From a128c1b9-6f98-40cf-a3ae-321d5e4e1106 100 14 Processed 9043 -9046 17bc6764-fc30-4fe3-8cfc-18d10e5357d3 alpha_fold ENS01 2024-04-24 16:07:22 From a128c1b9-6f98-40cf-a3ae-321d5e4e1106 100 15 Processed 9043 -9047 7e8844b3-733d-4962-a144-70e8cc69a3a6 genebuild_files ENS01 2024-04-24 16:07:22 From 11a0be7f-99ae-45d3-a004-dc19bb562330 100 9 Processed 184 -9048 bec4dc62-aac5-4993-98ef-da92da3c3975 blast ENS01 2024-04-24 16:07:22 From 7e8844b3-733d-4962-a144-70e8cc69a3a6 100 16 Processed 9047 -9049 a187630f-56ea-4012-b10f-96d4eee7e280 ftp_dumps ENS01 2024-04-24 16:07:22 From 7e8844b3-733d-4962-a144-70e8cc69a3a6 100 17 Processed 9047 -9050 372c9ef4-5068-491e-bc9d-f173de3779d4 genebuild_web ENS01 2024-04-24 16:07:22 From 11a0be7f-99ae-45d3-a004-dc19bb562330 100 11 Processed 184 -9051 1f9b9cf6-af64-49da-9f6a-e91fcfe3748b thoas_dumps ENS01 2024-04-24 16:07:22 From 372c9ef4-5068-491e-bc9d-f173de3779d4 100 18 Processed 9050 -9052 14224fd8-39fc-4ce4-955a-9ceb53b7fe17 browser_files ENS01 2024-04-24 16:07:22 From 372c9ef4-5068-491e-bc9d-f173de3779d4 100 20 Processed 9050 -9053 a2d8c490-6152-4d44-8cd6-6318be80c6d0 checksums ENS01 2024-04-24 16:07:22 From 372c9ef4-5068-491e-bc9d-f173de3779d4 100 23 Processed 9050 -9054 569f3264-1e67-474a-bcca-d1f971bdfb6d genebuild_compute ENS01 2024-04-24 16:07:22 From bd63a676-45ff-494a-b26f-2b779cb6c180 179 8 Processed 338 -9055 2f75afb7-07b9-4f26-914b-447609ae9661 xrefs ENS01 2024-04-24 16:07:22 From 569f3264-1e67-474a-bcca-d1f971bdfb6d 179 13 Processed 9054 -9056 4030627d-9f93-418b-b162-ef4c4ea7187c protein_features ENS01 2024-04-24 16:07:22 From 569f3264-1e67-474a-bcca-d1f971bdfb6d 179 14 Processed 9054 -9057 03698609-b0cc-4ebc-ba4e-9c1839c07375 alpha_fold ENS01 2024-04-24 16:07:22 From 569f3264-1e67-474a-bcca-d1f971bdfb6d 179 15 Processed 9054 -9058 5d221954-3022-434b-8167-4837bcb83cdf genebuild_files ENS01 2024-04-24 16:07:22 From bd63a676-45ff-494a-b26f-2b779cb6c180 179 9 Processed 338 -9059 0a2ed457-8cb8-4c32-9670-f2d29accf899 blast ENS01 2024-04-24 16:07:22 From 5d221954-3022-434b-8167-4837bcb83cdf 179 16 Processed 9058 -9060 b8aa05bd-9375-49aa-bdd4-2cd81dded467 ftp_dumps ENS01 2024-04-24 16:07:22 From 5d221954-3022-434b-8167-4837bcb83cdf 179 17 Processed 9058 -9061 032c5450-032b-4bd2-91e5-8b00482bb51f genebuild_web ENS01 2024-04-24 16:07:22 From bd63a676-45ff-494a-b26f-2b779cb6c180 179 11 Processed 338 -9062 900b59d2-8ac7-4e41-a588-a4314dadfe9d thoas_dumps ENS01 2024-04-24 16:07:22 From 032c5450-032b-4bd2-91e5-8b00482bb51f 179 18 Processed 9061 -9063 c813b3e5-9756-4431-86b9-1d78e3242ffc browser_files ENS01 2024-04-24 16:07:22 From 032c5450-032b-4bd2-91e5-8b00482bb51f 179 20 Processed 9061 -9064 6677ee8d-e814-4991-87b1-967d752652f0 checksums ENS01 2024-04-24 16:07:22 From 032c5450-032b-4bd2-91e5-8b00482bb51f 179 23 Processed 9061 -9065 b60e4ed3-9260-42fd-bb44-648e2240c0fd homology_compute 1.0 2024-04-24 16:07:22 From f2734f34-36a0-4594-871d-f7f6d317d05a 429 25 Submitted 2454 -9066 ef13256d-516a-475d-9769-8ec0b487c39a homology_compute 1.0 2024-04-24 16:07:22 From f32b7f9a-97fd-41cd-86be-a5fb5becd335 469 25 Processed 2494 -9067 6246ba7c-ae8c-4e66-b696-0aaceb586d75 homology_compute 1.0 2024-04-24 16:07:22 From f93d21ca-9a24-4c31-ae11-b0f8d3deab6d 423 25 Submitted 6849 -9068 23bac8a9-553f-4e00-85f3-2844d6634364 homology_compute 1.0 2024-04-24 16:07:22 From 5b618784-a5ff-46cc-8102-b082ffb6e447 368 25 Submitted 8130 -9069 dc06cef3-40c1-4924-82aa-d95003b033d0 homology_compute 1.0 2024-04-24 16:07:22 From a5bf42be-63c1-4616-9af1-bc03aea92643 443 25 Submitted 8661 -9070 bf1f5064-8520-abcd-84e4-449aa6c1c1e2 variation 2.0 2023-11-09 12:49:25.273751 GRCh38 673 3 Faulty \N -9071 bf1f5064-8520-abcd-84e4-449aa6c221e2 variation 2.0 2023-11-09 12:49:25.273751 GRCh38 673 3 Processing \N -9072 99999999-847e-4742-a68b-18c3ece068aa genebuild ENS01 2023-09-22 15:03:02 GCA_021950905.1_ENS01 18 2 Submitted \N -9073 99999999-da2c-4997-8002-9da717ba79d2 genebuild_compute ENS01 2024-04-24 16:07:22 From 2ef7c056-847e-4742-a68b-18c3ece068aa 18 8 Submitted 9072 -9074 99999999-d9e0-4eca-9a49-7a6d9e311c8d xrefs ENS01 2024-04-24 16:07:22 From 7573b939-da2c-4997-8002-9da717ba79d2 18 13 Submitted 9073 diff --git a/src/tests/databases/ensembl_genome_metadata/dataset_attribute.txt b/src/tests/databases/ensembl_genome_metadata/dataset_attribute.txt deleted file mode 100644 index 45c652e7..00000000 --- a/src/tests/databases/ensembl_genome_metadata/dataset_attribute.txt +++ /dev/null @@ -1,2246 +0,0 @@ -201067 GCA_000005845.2 1 1 -201085 1 2 1 -201084 1 3 1 -201078 2013-11 5 1 -201066 ASM584v2 6 1 -201083 50.79 7 1 -201087 1 8 1 -201077 complete genome 9 1 -201071 chromosome:ASM584v2#contig 10 1 -201068 ASM584v2 11 1 -201069 Univ. Wisconsin 12 1 -201070 http://www.ebi.ac.uk/ena/data/view/GCA_000005845 13 1 -201072 0 14 1 -201086 1 16 1 -201081 3977025 17 1 -201074 0 18 1 -201082 4641652 19 1 -201075 ASM584v2 81 1 -201007 938.55 21 2 -201032 1.00 22 2 -201035 1.00 23 2 -201029 937.89 24 2 -201038 939.17 25 2 -201011 939.92 26 2 -201027 1.67 27 2 -201030 939.92 28 2 -201008 4240 29 2 -201031 4239 30 2 -201039 1.00 31 2 -201018 d41d8cd98f00b204e9800998ecf8427e 32 2 -201015 2006-02 33 2 -201016 2018-09 34 2 -201012 toplevel 35 2 -201028 7077 36 2 -201017 import 37 2 -201062 Import 38 2 -201048 1.00 39 2 -201043 269.83 40 2 -201040 269.83 41 2 -201049 269.83 42 2 -201044 2905 43 2 -201020 0 44 2 -201019 0 45 2 -201047 179 46 2 -201046 53 47 2 -201042 179 48 2 -201023 0 49 2 -201041 179 50 2 -201045 1.00 51 2 -201053 1.17 52 2 -201060 783.57 53 2 -201057 1103.18 54 2 -201059 1150.95 55 2 -201051 913.03 56 2 -201055 8622 57 2 -201058 115 58 2 -201050 51 59 2 -201056 134 60 2 -201052 19 61 2 -201054 115 62 2 -201061 1.00 63 2 -201009 42 64 2 -201013 2018-09-UnivWisconsin 65 2 -201034 4242 66 2 -201010 4245 67 2 -201036 3 68 2 -201033 4242 69 2 -201037 1.00 70 2 -201014 EXT01 71 2 -201063 b2992 72 2 -201064 Chromosome:3140311-3140799 73 2 -201025 ENA 84 2 -201024 https://ebi.ac.uk/ena 85 2 -201026 community 169 2 -201022 179 170 2 -187255 GCA_018473315.1 1 7 -187269 512 3 7 -187272 34159233 4 7 -187254 2021-05 5 7 -187256 HG03540.alt.pat.f1_v2 6 7 -187268 40.83 7 7 -187265 scaffold 9 7 -187257 HG03540.alt.pat.f1_v2 11 7 -187273 1 14 7 -187264 NULL 15 7 -187274 512 16 7 -187266 34182815 17 7 -187271 26 18 7 -187267 3065276644 19 7 -187260 high 74 7 -187258 NCBI 75 7 -187259 INSDC Assembly ID 76 7 -187337 GCA_018469415.1 1 9 -187351 369 3 9 -187354 55482364 4 9 -187336 2021-05 5 9 -187338 HG03516.alt.pat.f1_v2 6 9 -187350 40.85 7 9 -187347 scaffold 9 9 -187339 HG03516.alt.pat.f1_v2 11 9 -187355 1 14 9 -187346 NULL 15 9 -187356 369 16 9 -187348 34407519 17 9 -187353 26 18 9 -187349 3067004974 19 9 -187342 high 74 9 -187340 NCBI 75 9 -187341 INSDC Assembly ID 76 9 -187419 GCA_018469875.1 1 11 -187434 1 2 11 -187433 292 3 11 -187436 60041455 4 11 -187418 2021-05 5 11 -187420 HG02622.pri.mat.f1_v2 6 11 -187432 40.86 7 11 -187429 contig 9 11 -187421 HG02622.pri.mat.f1_v2 11 11 -187428 NULL 15 11 -187438 292 16 11 -187430 34180661 17 11 -187431 3046105980 19 11 -187424 high 74 11 -187422 NCBI 75 11 -187423 INSDC Assembly ID 76 11 -187501 GCA_018505825.1 1 13 -187516 1 2 13 -187515 445 3 13 -187518 23115113 4 13 -187500 2021-05 5 13 -187502 HG02109.pri.mat.f1_v2 6 13 -187514 40.86 7 13 -187511 contig 9 13 -187503 HG02109.pri.mat.f1_v2 11 13 -187510 NULL 15 13 -187520 445 16 13 -187512 34073218 17 13 -187513 3026115826 19 13 -187506 high 74 13 -187504 NCBI 75 13 -187505 INSDC Assembly ID 76 13 -187453 1163.76 21 14 -187459 7.79 22 14 -187462 7.79 23 14 -187456 149.30 24 14 -187465 247.24 25 14 -187449 66114.95 26 14 -187454 6200.76 27 14 -187457 3474.93 28 14 -187452 19809 29 14 -187458 104402 30 14 -187466 5.27 31 14 -187441 2022-08 33 14 -187442 2022-07 34 14 -187447 toplevel 35 14 -187455 2474686 36 14 -187445 projection_build 37 14 -187446 Mapping from reference 38 14 -187478 3.30 39 14 -187472 350.03 40 14 -187468 21565.87 41 14 -187480 1070.54 42 14 -187473 1375430 43 14 -187477 17068 44 14 -187479 2299 45 14 -187476 24448 46 14 -187475 4 47 14 -187470 5081 48 14 -187467 125785 49 14 -187469 54723 50 14 -187474 2.24 51 14 -187484 2.36 52 14 -187491 354.76 53 14 -187488 4120.64 54 14 -187490 4308.30 55 14 -187482 804.48 56 14 -187486 909446 57 14 -187489 15648 58 14 -187481 2 59 14 -187487 44991 60 14 -187483 25925 61 14 -187485 19066 62 14 -187492 1.22 63 14 -187451 8 64 14 -187440 2022-08-Ensembl 65 14 -187461 813720 66 14 -187450 1247235 67 14 -187463 1087119 68 14 -187460 160116 69 14 -187464 8.08 70 14 -187448 ENS01 71 14 -187444 39 77 14 -187471 15995.34 78 14 -187443 homo_sapiens_core_104_38 79 14 -187494 Ensembl 84 14 -187495 https://rapid.ensembl.org/info/genome/genebuild/full_genebuild.html 85 14 -187493 ensembl 169 14 -187496 180508 170 14 -187661 GCA_018852615.1 1 17 -187676 1 2 17 -187675 445 3 17 -187678 72699918 4 17 -187660 2021-06 5 17 -187662 HG002.pri.mat.f1_v2 6 17 -187674 40.85 7 17 -187671 scaffold 9 17 -187663 HG002.pri.mat.f1_v2 11 17 -187679 6 14 17 -187670 NULL 15 17 -187680 445 16 17 -187672 34165134 17 17 -187677 372 18 17 -187673 3060609068 19 17 -187666 high 74 17 -187664 NCBI 75 17 -187665 INSDC Assembly ID 76 17 -193584 GCA_000002765.2 1 23 -193597 14 2 23 -193596 14 3 23 -193591 2017-10 5 23 -193585 ASM276v2 6 23 -193595 19.34 7 23 -193602 1 8 23 -193590 complete genome 9 23 -193586 chromosome:ASM276v2#contig 10 23 -193587 ASM276v2 11 23 -193588 Naval Medical Research Institute 12 23 -193589 http://www.ebi.ac.uk/ena/data/view/GCA_000002765 13 23 -193601 14 16 23 -193593 12309897 17 23 -193594 23292622 19 23 -193605 ASM276v2 81 23 -193538 2297.48 21 24 -193544 2.64 22 24 -193547 2.64 23 24 -193541 870.63 24 24 -193550 870.38 25 24 -193534 2569.88 26 24 -193539 167.04 27 24 -193542 2296.23 28 24 -193537 5362 29 24 -193543 5358 30 24 -193551 1.00 31 24 -193526 d41d8cd98f00b204e9800998ecf8427e 32 24 -193527 2002-10 33 24 -193528 2017-10 34 24 -193529 toplevel 35 24 -193540 30864 36 24 -193530 import 37 24 -193532 Import 38 24 -193563 1.00 39 24 -193557 553.68 40 24 -193553 553.68 41 24 -193565 553.68 42 24 -193558 6175 43 24 -193561 252 46 24 -193560 68 47 24 -193555 252 48 24 -193554 252 50 24 -193559 1.00 51 24 -193569 3.01 52 24 -193576 520.40 53 24 -193573 1665.66 54 24 -193575 48.51 55 24 -193567 1568.01 56 24 -193571 11291 57 24 -193574 153 58 24 -193566 72 59 24 -193572 461 60 24 -193568 308 61 24 -193570 153 62 24 -193577 1.00 63 24 -193536 90 64 24 -193531 2017-10-ENA 65 24 -193546 14139 66 24 -193535 14146 67 24 -193548 8784 68 24 -193545 5362 69 24 -193549 1.00 70 24 -193533 EXT01 71 24 -193581 PF3D7_0532500 72 24 -193582 5:1316342-1316846 73 24 -193578 ENA 84 24 -193579 https://ebi.ac.uk/ena 85 24 -193580 252 170 24 -189538 GCA_021950905.1 1 37 -189553 23 2 37 -189552 514 3 37 -189555 84927121 4 37 -189537 2022-02 5 37 -189539 HG002.pat.cur.20211005 6 37 -189551 40.85 7 37 -189548 chromosome 9 37 -189540 HG002.pat.cur.20211005 11 37 -189556 117 14 37 -189547 NULL 15 37 -189557 514 16 37 -189549 32920283 17 37 -189554 966735 18 37 -189550 2959277077 19 37 -189543 high 74 37 -189541 NCBI 75 37 -189542 INSDC Assembly ID 76 37 -189490 1162.01 21 38 -189496 7.81 22 38 -189499 7.79 23 38 -189493 148.84 24 38 -189502 246.40 25 38 -189486 66612.77 26 38 -189491 6190.13 27 38 -189494 3492.51 28 38 -189489 19027 29 38 -189495 101295 30 38 -189503 5.32 31 38 -189478 2022-08 33 38 -189479 2022-07 34 38 -189484 toplevel 35 38 -189492 2481019 36 38 -189482 projection_build 37 38 -189483 Mapping from reference 38 38 -189515 3.30 39 38 -189509 347.19 40 38 -189505 21661.31 41 38 -189517 1062.19 42 38 -189510 1375441 43 38 -189514 16813 44 38 -189516 2218 45 38 -189513 23848 46 38 -189512 4 47 38 -189507 4817 48 38 -189504 123368 49 38 -189506 53525 50 38 -189511 2.24 51 38 -189521 2.41 52 38 -189528 349.77 53 38 -189525 4335.99 54 38 -189527 4378.19 55 38 -189519 809.54 56 38 -189523 909725 57 38 -189526 15206 58 38 -189518 7 59 38 -189524 44601 60 38 -189520 26063 61 38 -189522 18538 62 38 -189529 1.22 63 38 -189488 8 64 38 -189477 2022-10-Ensembl 65 38 -189498 790726 66 38 -189487 1213018 67 38 -189500 1057228 68 38 -189497 155790 69 38 -189501 8.19 70 38 -189485 ENS01 71 38 -189481 51 77 38 -189508 16006.39 78 38 -189480 homo_sapiens_core_104_38 79 38 -189531 Ensembl 84 38 -189532 https://rapid.ensembl.org/info/genome/genebuild/full_genebuild.html 85 38 -189530 ensembl 169 38 -189533 176893 170 38 -192935 GCA_000001405.14 1 61 -192946 25 2 61 -192945 27948 3 61 -192948 38440852 4 61 -192931 2009-02 5 61 -192930 GRCh37 6 61 -192944 37.78 7 61 -192951 chromosome 9 61 -192933 chromosome:GRCh37#chromosome:GRCh38 10 61 -192932 GRCh37.p13 11 61 -192953 GENCODE 12 61 -192949 242 14 61 -192950 297 16 61 -192942 33914472 17 61 -192947 243146473 18 61 -192943 3234834689 19 61 -192940 hg19 20 61 -192936 high 74 61 -192937 NCBI 75 61 -192938 GenBank Assembly ID 76 61 -192934 Genome Reference Consortium Human Reference 37 80 61 -192939 GRCh37 81 61 -192886 1141.09 21 62 -192892 7.60 22 62 -192895 7.34 23 62 -192889 150.21 24 62 -192898 231.84 25 62 -192882 63989.10 26 62 -192887 6220.21 27 62 -192890 3136.38 28 62 -192885 20787 29 62 -192891 95346 30 62 -192899 4.59 31 62 -192875 2011-04 33 62 -192876 2013-09 34 62 -192878 toplevel 35 62 -192888 2304638 36 62 -192877 full_genebuild 37 62 -192880 Manual annotation 38 62 -192911 2.51 39 62 -192905 289.17 40 62 -192901 15206.48 41 62 -192913 712.20 42 62 -192906 1536213 43 62 -192910 13860 44 62 -192912 2038 45 62 -192909 22948 46 62 -192908 35 47 62 -192903 7050 48 62 -192900 49857 49 62 -192902 32971 50 62 -192907 1.44 51 62 -192917 2.31 52 62 -192924 362.52 53 62 -192921 3525.30 54 62 -192923 3049.46 55 62 -192915 809.46 56 62 -192919 586570 57 62 -192922 14170 58 62 -192914 23 59 62 -192920 40183 60 62 -192916 22805 61 62 -192918 17378 62 62 -192925 1.23 63 62 -192884 8 64 62 -192874 2010-07-Ensembl 65 62 -192894 724218 66 62 -192883 1072753 67 62 -192896 926601 68 62 -192893 146152 69 62 -192897 7.03 70 62 -192881 GENCODE19 71 62 -192927 ENSG00000139618 72 62 -192928 17:64155265-64255266 73 62 -192873 39 77 62 -192904 12156.05 78 62 -192879 2013-07-22 82 62 -192926 82828 170 62 -197309 GCA_900519105.1 1 147 -197323 22 2 147 -197322 22 3 147 -197325 51842 4 147 -197310 2018-07 5 147 -197311 IWGSC 6 147 -197321 45.18 7 147 -197329 1 8 147 -197316 chromosome 9 147 -197312 chromosome:IWGSC#scaffold:TGACv1 10 147 -197313 iwgsc_refseqv1.0 11 147 -197314 International Wheat Genome Sequencing Consortium 12 147 -197315 https://www.ebi.ac.uk/ena/data/view/GCA_900519105.1 13 147 -197326 692976 14 147 -197327 22 16 147 -197319 133312441 17 147 -197324 275682619 18 147 -197320 14547261565 19 147 -197328 IWGSC 81 147 -197261 1332.42 21 148 -197267 5.34 22 148 -197270 5.60 23 148 -197264 249.47 24 148 -197273 303.33 25 148 -197257 3488.91 26 148 -197262 491.21 27 148 -197265 1570.98 28 148 -197260 107891 29 148 -197266 133346 30 148 -197274 1.24 31 148 -197252 c1cecf20d9c2ae3e2f568924e212ab27 32 148 -197253 toplevel 35 148 -197263 124945 36 148 -197254 import 37 148 -197301 Import 38 148 -197286 1.00 39 148 -197280 149.42 40 148 -197276 149.42 41 148 -197288 149.42 42 148 -197281 5792 43 148 -197285 362 44 148 -197284 12853 46 148 -197283 42 47 148 -197278 12491 48 148 -197277 12853 50 148 -197282 1.00 51 148 -197259 54 64 148 -197255 2018-04-IWGSC 65 148 -197269 712204 66 148 -197258 749233 67 148 -197271 615489 68 148 -197268 133744 69 148 -197272 1.24 70 148 -197256 EXT01 71 148 -197306 TraesCS3D02G273600 72 148 -197307 3D:2585940-2634711 73 148 -197303 PGSB 84 148 -197304 https://www.helmholtz-munich.de/en/pgsb 85 148 -197302 community 169 148 -197305 12853 170 148 -193402 GCA_000001405.29 1 171 -193432 25 2 171 -193430 36829 3 171 -193436 54806562 4 171 -193396 2013-12 5 171 -193400 GRCh38 6 171 -193428 38.88 7 171 -193412 1 8 171 -193420 chromosome 9 171 -193392 chromosome:NCBI36#chromosome:NCBI35 10 171 -193397 GRCh38.p14 11 171 -193414 Genome Reference Consortium 12 171 -193416 https://www.ncbi.nlm.nih.gov/grc 13 171 -193438 663 14 171 -193440 709 16 171 -193424 34493611 17 171 -193434 161611139 18 171 -193426 3298912062 19 171 -193410 hg38 20 171 -193393 high 74 171 -193404 NCBI 75 171 -193406 GenBank Assembly ID 76 171 -193408 Genome Reference Consortium Human Build 38 80 171 -193418 GRCh38 81 171 -193304 1191.97 21 172 -193316 7.98 22 172 -193321 8.13 23 172 -193310 149.38 24 172 -193326 250.15 25 172 -193296 67396.48 26 172 -193306 6172.48 27 172 -193312 3566.92 28 172 -193302 20481 29 172 -193314 111076 30 172 -193329 5.42 31 172 -193284 2014-07 33 172 -193281 2023-03 34 172 -193292 toplevel 35 172 -193308 2473539 36 172 -193286 full_genebuild 37 172 -193290 Manual annotation 38 172 -193353 3.50 39 172 -193340 339.13 40 172 -193334 22981.34 41 172 -193357 967.28 42 172 -193342 1375317 43 172 -193351 18874 44 172 -193355 2221 45 172 -193349 25959 46 172 -193346 41 47 172 -193337 4864 48 172 -193331 160555 49 172 -193335 64262 50 172 -193344 2.48 51 172 -193366 2.11 52 172 -193380 371.37 53 172 -193374 3412.92 54 172 -193378 4117.36 55 172 -193361 725.47 56 172 -193370 909387 57 172 -193376 15239 58 172 -193360 23 59 172 -193373 35229 60 172 -193363 18526 61 172 -193368 16703 62 172 -193382 1.10 63 172 -193300 8 64 172 -193283 2014-01-Ensembl 65 172 -193320 886243 66 172 -193298 1388435 67 172 -193323 1217602 68 172 -193318 170833 69 172 -193325 8.34 70 172 -193294 GENCODE44 71 172 -193385 ENSG00000221914 72 172 -193387 8:26291508-26372680 73 172 -193279 39 77 172 -193339 14932.57 78 172 -193288 19-12-2022 82 172 -193282 ensembl 169 172 -193384 224817 170 172 -199440 GCA_018505865.1 1 177 -199454 481 3 177 -199457 24098322 4 177 -199439 2021-05 5 177 -199441 HG02109.alt.pat.f1_v2 6 177 -199453 40.84 7 177 -199450 scaffold 9 177 -199442 HG02109.alt.pat.f1_v2 11 177 -199458 2 14 177 -199449 NULL 15 177 -199459 481 16 177 -199451 34164067 17 177 -199456 70 18 177 -199452 3037645976 19 177 -199445 high 74 177 -199443 NCBI 75 177 -199444 INSDC Assembly ID 76 177 -199392 1164.26 21 178 -199398 7.80 22 178 -199401 7.79 23 178 -199395 149.30 24 178 -199404 247.32 25 178 -199388 65963.23 26 178 -199393 6193.22 27 178 -199396 3473.08 28 178 -199391 19876 29 178 -199397 104530 30 178 -199405 5.26 31 178 -199380 2022-08 33 178 -199381 2022-07 34 178 -199386 toplevel 35 178 -199394 2475405 36 178 -199384 projection_build 37 178 -199385 Mapping from reference 38 178 -199417 3.29 39 178 -199411 350.32 40 178 -199407 21621.28 41 178 -199419 1066.54 42 178 -199412 1375866 43 178 -199416 17097 44 178 -199418 2291 45 178 -199415 24484 46 178 -199414 4 47 178 -199409 5096 48 178 -199406 125264 49 178 -199408 54631 50 178 -199413 2.23 51 178 -199423 2.38 52 178 -199430 353.31 53 178 -199427 4232.62 54 178 -199429 4367.32 55 178 -199421 807.42 56 178 -199425 909934 57 178 -199428 15617 58 178 -199420 2 59 178 -199426 45313 60 178 -199422 26240 61 178 -199424 19073 62 178 -199431 1.22 63 178 -199390 8 64 178 -199379 2022-08-Ensembl 65 178 -199400 815082 66 178 -199389 1248811 67 178 -199402 1088549 68 178 -199399 160262 69 178 -199403 8.06 70 178 -199387 ENS01 71 178 -199383 39 77 178 -199410 16095.84 78 178 -199382 homo_sapiens_core_104_38 79 178 -199433 Ensembl 84 178 -199434 https://rapid.ensembl.org/info/genome/genebuild/full_genebuild.html 85 178 -199432 ensembl 169 178 -199435 179895 170 178 -193993 GCA_018852605.1 1 183 -194007 610 3 183 -194010 84969781 4 183 -193992 2021-06 5 183 -193994 HG002.alt.pat.f1_v2 6 183 -194006 40.87 7 183 -194003 scaffold 9 183 -193995 HG002.alt.pat.f1_v2 11 183 -194011 5 14 183 -194002 NULL 15 183 -194012 610 16 183 -194004 32931770 17 183 -194009 256 18 183 -194005 2958633312 19 183 -193998 high 74 183 -193996 NCBI 75 183 -193997 INSDC Assembly ID 76 183 -193945 1162.00 21 184 -193951 7.80 22 184 -193954 7.78 23 184 -193948 148.90 24 184 -193957 246.44 25 184 -193941 66617.42 26 184 -193946 6191.91 27 184 -193949 3493.15 28 184 -193944 19028 29 184 -193950 101278 30 184 -193958 5.32 31 184 -193933 2022-08 33 184 -193934 2022-07 34 184 -193939 toplevel 35 184 -193947 2481019 36 184 -193937 projection_build 37 184 -193938 Mapping from reference 38 184 -193970 3.30 39 184 -193964 347.32 40 184 -193960 21645.12 41 184 -193972 1061.66 42 184 -193965 1375441 43 184 -193969 16820 44 184 -193971 2220 45 184 -193968 23857 46 184 -193967 4 47 184 -193962 4817 48 184 -193959 123159 49 184 -193961 53487 50 184 -193966 2.24 51 184 -193976 2.43 52 184 -193983 347.41 53 184 -193980 4352.59 54 184 -193982 4365.72 55 184 -193974 811.39 56 184 -193978 909725 57 184 -193981 15198 58 184 -193973 7 59 184 -193979 45295 60 184 -193975 26650 61 184 -193977 18645 62 184 -193984 1.23 63 184 -193943 8 64 184 -193932 2022-08-Ensembl 65 184 -193953 790264 66 184 -193942 1212568 67 184 -193955 1056783 68 184 -193952 155785 69 184 -193956 8.19 70 184 -193940 ENS01 71 184 -193936 32 77 184 -193963 16006.56 78 184 -193935 homo_sapiens_core_104_38 79 184 -193986 Ensembl 84 184 -193987 https://rapid.ensembl.org/info/genome/genebuild/full_genebuild.html 85 184 -193985 ensembl 169 184 -193988 176646 170 184 -192770 GCA_018469925.1 1 197 -192784 270 3 197 -192787 51206351 4 197 -192769 2021-05 5 197 -192771 HG02622.alt.pat.f1_v2 6 197 -192783 40.84 7 197 -192780 scaffold 9 197 -192772 HG02622.alt.pat.f1_v2 11 197 -192788 1 14 197 -192779 NULL 15 197 -192789 270 16 197 -192781 34116085 17 197 -192786 27 18 197 -192782 3043426064 19 197 -192775 high 74 197 -192773 NCBI 75 197 -192774 INSDC Assembly ID 76 197 -202291 GCA_018469425.1 1 249 -202306 1 2 249 -202305 320 3 249 -202308 44773628 4 249 -202290 2021-05 5 249 -202292 HG03516.pri.mat.f1_v2 6 249 -202304 40.85 7 249 -202301 contig 9 249 -202293 HG03516.pri.mat.f1_v2 11 249 -202300 NULL 15 249 -202310 320 16 249 -202302 34212357 17 249 -202303 3033479640 19 249 -202296 high 74 249 -202294 NCBI 75 249 -202295 INSDC Assembly ID 76 249 -202243 1163.20 21 250 -202249 7.79 22 250 -202252 7.79 23 250 -202246 149.22 24 250 -202255 247.21 25 250 -202239 66186.33 26 250 -202244 6201.42 27 250 -202247 3474.20 28 250 -202242 19890 29 250 -202248 104723 30 250 -202256 5.27 31 250 -202231 2022-08 33 250 -202232 2022-07 34 250 -202237 toplevel 35 250 -202245 2475163 36 250 -202235 projection_build 37 250 -202236 Mapping from reference 38 250 -202268 3.30 39 250 -202262 350.95 40 250 -202258 21740.82 41 250 -202270 1075.58 42 250 -202263 1375855 43 250 -202267 17173 44 250 -202269 2288 45 250 -202266 24496 46 250 -202265 4 47 250 -202260 5035 48 250 -202257 125629 49 250 -202259 54633 50 250 -202264 2.23 51 250 -202274 2.37 52 250 -202281 355.21 53 250 -202278 4124.86 54 250 -202280 4302.59 55 250 -202272 809.71 56 250 -202276 909733 57 250 -202279 15708 58 250 -202271 2 59 250 -202277 45203 60 250 -202273 26114 61 250 -202275 19089 62 250 -202282 1.22 63 250 -202241 8 64 250 -202230 2022-08-Ensembl 65 250 -202251 816265 66 250 -202240 1250909 67 250 -202253 1090338 68 250 -202250 160571 69 250 -202254 8.07 70 250 -202238 ENS01 71 250 -202234 32 77 250 -202261 16131.44 78 250 -202233 homo_sapiens_core_104_38 79 250 -202284 Ensembl 84 250 -202285 https://rapid.ensembl.org/info/genome/genebuild/full_genebuild.html 85 250 -202283 ensembl 169 250 -202286 180262 170 250 -201391 GCA_021951015.1 1 337 -201406 23 2 337 -201405 355 3 337 -201408 62879740 4 337 -201390 2022-02 5 337 -201392 HG002.mat.cur.20211005 6 337 -201404 40.83 7 337 -201401 chromosome 9 337 -201393 HG002.mat.cur.20211005 11 337 -201409 109 14 337 -201400 NULL 15 337 -201410 355 16 337 -201402 34181334 17 337 -201407 1510078 18 337 -201403 3061735012 19 337 -201396 high 74 337 -201394 NCBI 75 337 -201395 INSDC Assembly ID 76 337 -201343 1163.22 21 338 -201349 7.80 22 338 -201352 7.79 23 338 -201346 149.12 24 338 -201355 247.16 25 338 -201339 66336.70 26 338 -201344 6211.72 27 338 -201347 3479.81 28 338 -201342 19845 29 338 -201348 104818 30 338 -201356 5.28 31 338 -201331 2022-08 33 338 -201332 2022-07 34 338 -201337 toplevel 35 338 -201345 2474673 36 338 -201335 projection_build 37 338 -201336 Mapping from reference 38 338 -201368 3.30 39 338 -201362 350.54 40 338 -201358 21646.34 41 338 -201370 1070.35 42 338 -201363 1374470 43 338 -201367 17156 44 338 -201369 2312 45 338 -201366 24585 46 338 -201365 4 47 338 -201360 5117 48 338 -201357 126258 49 338 -201359 54865 50 338 -201364 2.23 51 338 -201374 2.36 52 338 -201381 356.20 53 338 -201378 4119.05 54 338 -201380 4279.70 55 338 -201372 811.80 56 338 -201376 909485 57 338 -201379 15774 58 338 -201371 9 59 338 -201377 45260 60 338 -201373 26117 61 338 -201375 19143 62 338 -201382 1.21 63 338 -201341 8 64 338 -201330 2022-10-Ensembl 65 338 -201351 817565 66 338 -201340 1252957 67 338 -201353 1092207 68 338 -201350 160750 69 338 -201354 8.10 70 338 -201338 ENS01 71 338 -201334 51 77 338 -201361 16087.71 78 338 -201333 homo_sapiens_core_104_38 79 338 -201384 Ensembl 84 338 -201385 https://rapid.ensembl.org/info/genome/genebuild/full_genebuild.html 85 338 -201383 ensembl 169 338 -201386 181123 170 338 -203926 GCA_018473295.1 1 347 -203941 1 2 347 -203940 435 3 347 -203943 30474809 4 347 -203925 2021-05 5 347 -203927 HG03540.pri.mat.f1_v2 6 347 -203939 40.85 7 347 -203936 scaffold 9 347 -203928 HG03540.pri.mat.f1_v2 11 347 -203944 1 14 347 -203935 NULL 15 347 -203945 435 16 347 -203937 34126066 17 347 -203942 26 18 347 -203938 3048418776 19 347 -203931 high 74 347 -203929 NCBI 75 347 -203930 INSDC Assembly ID 76 347 -203878 1164.42 21 348 -203884 7.80 22 348 -203887 7.80 23 348 -203881 149.21 24 348 -203890 247.18 25 348 -203874 66207.16 26 348 -203879 6200.13 27 348 -203882 3480.85 28 348 -203877 19790 29 348 -203883 104422 30 348 -203891 5.28 31 348 -203866 2022-08 33 348 -203871 2022-08 34 348 -203870 toplevel 35 348 -203880 2475741 36 348 -203869 projection_build 37 348 -203872 Mapping from reference 38 348 -203903 3.30 39 348 -203897 349.54 40 348 -203893 21751.23 41 348 -203905 1068.89 42 348 -203898 1375683 43 348 -203902 17074 44 348 -203904 2310 45 348 -203901 24445 46 348 -203900 4 47 348 -203895 5061 48 348 -203892 126140 49 348 -203894 54797 50 348 -203899 2.24 51 348 -203909 2.36 52 348 -203916 355.91 53 348 -203913 4157.09 54 348 -203915 4334.90 55 348 -203907 810.69 56 348 -203911 909713 57 348 -203914 15669 58 348 -203906 2 59 348 -203912 45082 60 348 -203908 26011 61 348 -203910 19071 62 348 -203917 1.22 63 348 -203876 8 64 348 -203865 2022-08-Ensembl 65 348 -203886 814831 66 348 -203875 1248838 67 348 -203888 1088642 68 348 -203885 160196 69 348 -203889 8.09 70 348 -203873 ENS01 71 348 -203868 51 77 348 -203896 16065.48 78 348 -203867 homo_sapiens_core_104_38 79 348 -203919 Ensembl 84 348 -203920 https://rapid.ensembl.org/info/genome/genebuild/full_genebuild.html 85 348 -203918 ensembl 169 348 -203921 180937 170 348 -199109 GCA_000146045.2 1 401 -199126 16 2 401 -199125 16 3 401 -199111 2011-09 5 401 -199112 R64-1-1 6 401 -199124 38.15 7 401 -199131 1 8 401 -199119 complete genome 9 401 -199114 chromosome:R64-1-1#contig 10 401 -199115 R64-1-1 11 401 -199117 SGD 12 401 -199118 http://www.yeastgenome.org/ 13 401 -199130 16 16 401 -199122 8762001 17 401 -199123 12071326 19 401 -199120 sacCer3 20 401 -199110 high 74 401 -199113 Saccharomyces cerevisiae S288c assembly from Saccharomyces Genome Database 80 401 -199133 R64-1-1 81 401 -199116 2 83 401 -199061 1327.58 21 402 -199067 1.05 22 402 -199070 1.05 23 402 -199064 1267.47 24 402 -199073 1267.47 25 402 -199057 1344.37 26 402 -199062 354.08 27 402 -199065 1327.58 28 402 -199060 6600 29 402 -199066 6600 30 402 -199074 1.00 31 402 -199051 2018-10 33 402 -199052 2018-10 34 402 -199053 toplevel 35 402 -199063 14733 36 402 -199054 import 37 402 -199101 Import 38 402 -199086 1.16 39 402 -199080 171.64 40 402 -199076 208.59 41 402 -199088 198.77 42 402 -199081 5947 43 402 -199084 424 46 402 -199083 58 47 402 -199078 424 48 402 -199075 67 49 402 -199077 424 50 402 -199082 1.00 51 402 -199092 1.00 52 402 -199099 863.25 53 402 -199096 863.25 54 402 -199090 863.25 56 402 -199094 3147 57 402 -199097 12 58 402 -199089 228 59 402 -199095 12 60 402 -199093 12 62 402 -199100 1.00 63 402 -199059 51 64 402 -199055 2017-01-SGD 65 402 -199069 6913 66 402 -199058 6913 67 402 -199071 313 68 402 -199068 6600 69 402 -199072 1.00 70 402 -199056 EXT01 71 402 -199106 YDL168W 72 402 -199107 VII:786054-786920 73 402 -199050 1 77 402 -199079 62.18 78 402 -199103 SGD 84 402 -199104 https://yeastgenome.org/ 85 402 -199102 community 169 402 -199105 491 170 402 -205458 GCA_000002985.3 1 405 -205470 6 2 405 -205469 3267 3 405 -205475 17493829 4 405 -205457 2012-12 5 405 -205455 WBcel235 6 405 -205468 35.44 7 405 -205476 1 8 405 -205463 complete genome 9 405 -205453 chromosome:WBcel235#chromosome:WBcel215 10 405 -205456 WBcel235 11 405 -205461 WormBase 12 405 -205462 http://www.wormbase.org 13 405 -205474 6 16 405 -205466 24569601 17 405 -205467 100272607 19 405 -205454 high 74 405 -205459 NCBI 75 405 -205460 Genome Assembly ID 76 405 -205472 WBcel235 81 405 -205405 1412.42 21 406 -205411 6.77 22 406 -205414 7.02 23 406 -205408 208.51 24 406 -205417 237.67 25 406 -205401 3224.91 26 406 -205406 397.55 27 406 -205409 1447.28 28 406 -205404 19985 29 406 -205410 31865 30 406 -205418 1.59 31 406 -205395 2014-10 33 406 -205396 2014-10 34 406 -205399 toplevel 35 406 -205407 102756 36 406 -205397 import 37 406 -205445 Import 38 406 -205430 1.02 39 406 -205424 73.00 40 406 -205420 82.61 41 406 -205432 75.03 42 406 -205425 14770 43 406 -205429 294 44 406 -205428 24813 46 406 -205427 17 47 406 -205422 24519 48 406 -205419 526 49 406 -205421 25311 50 406 -205426 1.02 51 406 -205436 4.10 52 406 -205443 228.89 53 406 -205440 1521.23 54 406 -205442 196.66 55 406 -205434 931.43 56 406 -205438 17899 57 406 -205441 2128 58 406 -205433 63 59 406 -205439 8869 60 406 -205435 6704 61 406 -205437 2165 62 406 -205444 1.02 63 406 -205403 30 64 406 -205394 2022-01-WormBase 65 406 -205413 215849 66 406 -205402 228411 67 406 -205415 195887 68 406 -205412 32524 69 406 -205416 1.63 70 406 -205400 EXT01 71 406 -205450 WBGene00004893 72 406 -205451 X:937766-957832 73 406 -205398 10 77 406 -205423 385.30 78 406 -205447 Wormbase 84 406 -205448 https://wormbase.org/ 85 406 -205446 wormbase 169 406 -205449 25837 170 406 -211872 EXT01 197 406 -187207 1165.04 21 888 -187213 7.80 22 888 -187216 7.80 23 888 -187210 149.33 24 888 -187219 247.32 25 888 -187203 66396.37 26 888 -187208 6207.11 27 888 -187211 3483.73 28 888 -187206 19817 29 888 -187212 104548 30 888 -187220 5.28 31 888 -187195 2022-08 33 888 -187200 2022-08 34 888 -187199 toplevel 35 888 -187209 2474345 36 888 -187198 projection_build 37 888 -187201 Mapping from reference 38 888 -187232 3.29 39 888 -187226 351.62 40 888 -187222 21667.77 41 888 -187234 1072.40 42 888 -187227 1375744 43 888 -187231 17147 44 888 -187233 2317 45 888 -187230 24486 46 888 -187229 4 47 888 -187224 5022 48 888 -187221 124766 49 888 -187223 54475 50 888 -187228 2.22 51 888 -187238 2.38 52 888 -187245 354.81 53 888 -187242 4206.69 54 888 -187244 4322.62 55 888 -187236 812.34 56 888 -187240 909639 57 888 -187243 15765 58 888 -187235 1 59 888 -187241 45760 60 888 -187237 26562 61 888 -187239 19198 62 888 -187246 1.22 63 888 -187205 8 64 888 -187194 2022-08-Ensembl 65 888 -187215 815604 66 888 -187204 1249855 67 888 -187217 1089520 68 888 -187214 160335 69 888 -187218 8.09 70 888 -187202 ENS01 71 888 -187197 51 77 888 -187225 16165.01 78 888 -187196 homo_sapiens_core_104_38 79 888 -187248 Ensembl 84 888 -187249 https://rapid.ensembl.org/info/genome/genebuild/full_genebuild.html 85 888 -187247 ensembl 169 888 -187250 179241 170 888 -187289 1164.93 21 890 -187295 7.80 22 890 -187298 7.79 23 890 -187292 149.32 24 890 -187301 247.26 25 890 -187285 65822.11 26 890 -187290 6207.47 27 890 -187293 3468.98 28 890 -187288 20001 29 890 -187294 104773 30 890 -187302 5.24 31 890 -187277 2022-08 33 890 -187278 2022-07 34 890 -187283 toplevel 35 890 -187291 2476271 36 890 -187281 projection_build 37 890 -187282 Mapping from reference 38 890 -187314 3.30 39 890 -187308 350.41 40 890 -187304 21716.60 41 890 -187316 1073.22 42 890 -187309 1375866 43 890 -187313 17159 44 890 -187315 2302 45 890 -187312 24520 46 890 -187311 4 47 890 -187306 5059 48 890 -187303 126332 49 890 -187305 54889 50 890 -187310 2.24 51 890 -187320 2.38 52 890 -187327 352.72 53 890 -187324 4193.05 54 890 -187326 4272.46 55 890 -187318 807.20 56 890 -187322 909729 57 890 -187325 15791 58 890 -187317 2 59 890 -187323 45913 60 890 -187319 26643 61 890 -187321 19270 62 890 -187328 1.22 63 890 -187287 8 64 890 -187276 2022-08-Ensembl 65 890 -187297 817342 66 890 -187286 1251612 67 890 -187299 1091038 68 890 -187296 160574 69 890 -187300 8.03 70 890 -187284 ENS01 71 890 -187280 32 77 890 -187307 16089.73 78 890 -187279 homo_sapiens_core_104_38 79 890 -187330 Ensembl 84 890 -187331 https://rapid.ensembl.org/info/genome/genebuild/full_genebuild.html 85 890 -187329 ensembl 169 890 -187332 181221 170 890 -211871 ENS01 197 890 -187371 1164.48 21 892 -187377 7.80 22 892 -187380 7.80 23 892 -187374 149.23 24 892 -187383 247.22 25 892 -187367 66405.72 26 892 -187372 6215.09 27 892 -187375 3481.03 28 892 -187370 19835 29 892 -187376 104562 30 892 -187384 5.27 31 892 -187359 2022-08 33 892 -187360 2022-07 34 892 -187365 toplevel 35 892 -187373 2475770 36 892 -187363 projection_build 37 892 -187364 Mapping from reference 38 892 -187396 3.30 39 892 -187390 350.51 40 892 -187386 21697.76 41 892 -187398 1069.47 42 892 -187391 1376351 43 892 -187395 17134 44 892 -187397 2291 45 892 -187394 24493 46 892 -187393 4 47 892 -187388 5068 48 892 -187385 125675 49 892 -187387 54657 50 892 -187392 2.23 51 892 -187402 2.39 52 892 -187409 352.93 53 892 -187406 4189.66 54 892 -187408 4303.41 55 892 -187400 811.77 56 892 -187404 909548 57 892 -187407 15543 58 892 -187399 1 59 892 -187405 45502 60 892 -187401 26498 61 892 -187403 19004 62 892 -187410 1.22 63 892 -187369 8 64 892 -187358 2022-08-Ensembl 65 892 -187379 815819 66 892 -187368 1250252 67 892 -187381 1089868 68 892 -187378 160384 69 892 -187382 8.09 70 892 -187366 ENS01 71 892 -187362 32 77 892 -187389 16057.02 78 892 -187361 homo_sapiens_core_104_38 79 892 -187412 Ensembl 84 892 -187413 https://rapid.ensembl.org/info/genome/genebuild/full_genebuild.html 85 892 -187411 ensembl 169 892 -187414 180332 170 892 -187613 1163.59 21 896 -187619 7.80 22 896 -187622 7.80 23 896 -187616 149.14 24 896 -187625 247.20 25 896 -187609 66345.71 26 896 -187614 6214.71 27 896 -187617 3481.97 28 896 -187612 19831 29 896 -187618 104635 30 896 -187626 5.28 31 896 -187601 2022-08 33 896 -187602 2022-07 34 896 -187607 toplevel 35 896 -187615 2474673 36 896 -187605 projection_build 37 896 -187606 Mapping from reference 38 896 -187638 3.30 39 896 -187632 350.64 40 896 -187628 21672.94 41 896 -187640 1071.34 42 896 -187633 1374470 43 896 -187637 17139 44 896 -187639 2316 45 896 -187636 24538 46 896 -187635 4 47 896 -187630 5083 48 896 -187627 126098 49 896 -187629 54765 50 896 -187634 2.23 51 896 -187644 2.38 52 896 -187651 355.44 53 896 -187648 4139.03 54 896 -187650 4279.29 55 896 -187642 814.27 56 896 -187646 909485 57 896 -187649 15732 58 896 -187641 9 59 896 -187647 45635 60 896 -187643 26437 61 896 -187645 19198 62 896 -187652 1.22 63 896 -187611 8 64 896 -187600 2022-08-Ensembl 65 896 -187621 816271 66 896 -187610 1251076 67 896 -187623 1090597 68 896 -187620 160479 69 896 -187624 8.09 70 896 -187608 ENS01 71 896 -187604 32 77 896 -187631 16101.24 78 896 -187603 homo_sapiens_core_104_38 79 896 -187654 Ensembl 84 896 -187655 https://rapid.ensembl.org/info/genome/genebuild/full_genebuild.html 85 896 -187653 ensembl 169 896 -187656 180863 170 896 -192722 1163.54 21 1006 -192728 7.80 22 1006 -192731 7.79 23 1006 -192725 149.24 24 1006 -192734 247.19 25 1006 -192718 66258.12 26 1006 -192723 6209.25 27 1006 -192726 3474.00 28 1006 -192721 19841 29 1006 -192727 104668 30 1006 -192735 5.28 31 1006 -192710 2022-08 33 1006 -192711 2022-07 34 1006 -192716 toplevel 35 1006 -192724 2475543 36 1006 -192714 projection_build 37 1006 -192715 Mapping from reference 38 1006 -192747 3.31 39 1006 -192741 350.44 40 1006 -192737 21712.60 41 1006 -192749 1073.34 42 1006 -192742 1377539 43 1006 -192746 17143 44 1006 -192748 2293 45 1006 -192745 24483 46 1006 -192744 4 47 1006 -192739 5047 48 1006 -192736 126127 49 1006 -192738 54685 50 1006 -192743 2.23 51 1006 -192753 2.38 52 1006 -192760 354.98 53 1006 -192757 4115.89 54 1006 -192759 4263.13 55 1006 -192751 814.46 56 1006 -192755 909556 57 1006 -192758 15721 58 1006 -192750 2 59 1006 -192756 45786 60 1006 -192752 26571 61 1006 -192754 19215 62 1006 -192761 1.22 63 1006 -192720 8 64 1006 -192709 2022-08-Ensembl 65 1006 -192730 815962 66 1006 -192719 1250738 67 1006 -192732 1090226 68 1006 -192729 160512 69 1006 -192733 8.09 70 1006 -192717 ENS01 71 1006 -192713 32 77 1006 -192740 16067.69 78 1006 -192712 homo_sapiens_core_104_38 79 1006 -192763 Ensembl 84 1006 -192764 https://rapid.ensembl.org/info/genome/genebuild/full_genebuild.html 85 1006 -192762 ensembl 169 1006 -192765 180812 170 1006 -26649 1047716160 119 1391 -26650 1:230710048:rs699 120 1391 -26651 1042639434 119 1392 -26652 1:230845794:rs699 120 1392 -26653 184169627 123 1393 -211360 23/05/26 182 1393 -211359 23/06/01 183 1393 -26655 161869381 123 1394 -211780 23/06/14 182 1394 -211779 23/06/06 183 1394 -211686 23/05/26 182 1405 -211685 23/06/12 183 1405 -211722 23/06/14 182 1414 -211721 23/06/02 183 1414 -211484 23/06/11 182 1421 -211483 23/06/10 183 1421 -26739 1056117383 119 1464 -26740 JAGYYS010000001.1:2233547:rs1453994370 120 1464 -26751 1057589994 119 1470 -26752 JAHAOO010000001.1:9172:rs765031495 120 1470 -26761 1054338571 119 1475 -26762 JAGYVY010000001.1:710982:rs1737833592 120 1475 -26793 1056666201 119 1491 -26794 JAHAON010000001.1:31416:rs1714786049 120 1491 -26799 1045449578 119 1494 -26800 JAHEPF010000001.1:187007:rs1378027620 120 1494 -26803 1053740562 119 1496 -26804 JAHEPG010000001.1:5092:rs1002974720 120 1496 -26825 1054887454 119 1507 -26826 JAGYYT010000001.1:2643538:rs1423484253 120 1507 -26829 1044482335 119 1509 -26830 JAGYVX010000001.1:28459:rs1278014195 120 1509 -26867 260526 119 1528 -26868 I:356:s01-356 120 1528 -26885 156317 119 1537 -26886 1:98866:rs3166780949 120 1537 -26899 25626842 119 1544 -26900 1A:58609:1A_58609 120 1544 -28123 91.2 161 2276 -28122 Pan troglodytes 162 2276 -28153 88.9 161 2291 -28152 Pan troglodytes 162 2291 -28209 91.3 161 2319 -28208 Pan troglodytes 162 2319 -28267 16.2 161 2348 -28266 Saccharomyces cerevisiae S288c 162 2348 -28285 27.4 161 2357 -28284 Rattus norvegicus 162 2357 -28339 47.3 161 2384 -28338 Triticum turgidum subsp. durum 162 2384 -28359 19.1 161 2394 -28358 Drosophila melanogaster 162 2394 -28387 90.6 161 2408 -28386 Pan troglodytes 162 2408 -28469 91.6 161 2449 -28468 Pan troglodytes 162 2449 -28479 91.5 161 2454 -28478 Pan troglodytes 162 2454 -28487 10.3 161 2458 -28486 Saccharomyces cerevisiae S288c 162 2458 -28535 91.4 161 2482 -28534 Pan troglodytes 162 2482 -28559 91.3 161 2494 -28558 Pan troglodytes 162 2494 -28609 110623 163 2518 -28610 36597 164 2518 -28611 268483 165 2518 -28612 101734 166 2518 -28613 30873 167 2518 -206984 91.4 161 6593 -206983 Pan troglodytes 162 6593 -207044 88.9 161 6623 -207043 Pan troglodytes 162 6623 -207196 22.7 161 6699 -207195 Paramecium tetraurelia 162 6699 -207496 91.0 161 6849 -207495 Pan troglodytes 162 6849 -207590 91.2 161 6896 -207589 Pan troglodytes 162 6896 -207936 29.5 161 7069 -207935 Strongyloides ratti 162 7069 -208152 91.6 161 7177 -208151 Pan troglodytes 162 7177 -208438 91.0 161 7320 -208437 Pan troglodytes 162 7320 -208868 90.6 161 7535 -208867 Pan troglodytes 162 7535 -209004 70.5 161 7603 -209003 Oryza sativa 162 7603 -209368 91.3 161 7785 -209367 Pan troglodytes 162 7785 -209438 91.2 161 7820 -209437 Pan troglodytes 162 7820 -209492 40.0 161 7847 -209491 Zymoseptoria tritici 162 7847 -210058 91.5 161 8130 -210057 Pan troglodytes 162 8130 -210582 10.3 161 8392 -210581 Saccharomyces cerevisiae S288c 162 8392 -211120 91.3 161 8661 -211119 Pan troglodytes 162 8661 -211640 23/10/13 182 8662 -211639 23/09/29 183 8662 -211608 23/09/30 182 8663 -211607 23/10/16 183 8663 -211372 23/10/06 182 8664 -211371 23/10/13 183 8664 -211852 23/10/07 182 8665 -211851 23/10/01 183 8665 -211794 23/10/01 182 8666 -211793 23/10/03 183 8666 -211374 23/09/30 182 8667 -211373 23/10/08 183 8667 -211178 23/10/03 182 8668 -211177 23/10/16 183 8668 -211636 23/10/15 182 8669 -211635 23/10/17 183 8669 -211568 23/10/02 182 8670 -211567 23/10/14 183 8670 -211310 23/10/06 182 8671 -211309 23/10/12 183 8671 -211454 23/10/09 182 8672 -211453 23/10/17 183 8672 -211760 23/10/16 182 8673 -211759 23/10/17 183 8673 -211224 23/09/30 182 8674 -211223 23/10/07 183 8674 -211864 23/10/07 182 8675 -211863 23/10/10 183 8675 -211440 23/10/04 182 8676 -211439 23/09/30 183 8676 -211380 23/09/30 182 8677 -211379 23/10/10 183 8677 -211514 23/10/17 182 8678 -211513 23/09/29 183 8678 -211180 23/10/16 182 8679 -211179 23/09/29 183 8679 -211766 23/10/01 182 8680 -211765 23/10/02 183 8680 -211660 23/10/09 182 8681 -211659 23/10/14 183 8681 -211542 23/10/06 182 8682 -211541 23/10/06 183 8682 -211266 23/10/16 182 8683 -211265 23/09/28 183 8683 -211712 23/09/30 182 8684 -211711 23/10/17 183 8684 -211572 23/10/12 182 8685 -211571 23/09/29 183 8685 -211288 23/10/06 182 8686 -211287 23/10/14 183 8686 -211512 23/10/15 182 8687 -211511 23/10/16 183 8687 -211194 23/10/09 182 8688 -211193 23/10/10 183 8688 -211352 23/10/14 182 8689 -211351 23/10/09 183 8689 -211252 23/10/14 182 8690 -211251 23/10/08 183 8690 -211336 23/09/30 182 8691 -211335 23/10/06 183 8691 -211294 23/10/06 182 8692 -211293 23/10/10 183 8692 -211828 23/10/16 182 8693 -211827 23/10/17 183 8693 -211480 23/10/11 182 8694 -211479 23/10/13 183 8694 -211432 23/10/09 182 8695 -211431 23/10/10 183 8695 -211400 23/10/16 182 8696 -211399 23/09/28 183 8696 -211734 23/10/15 182 8697 -211733 23/10/14 183 8697 -211496 23/10/10 182 8698 -211495 23/09/28 183 8698 -211366 23/10/04 182 8699 -211365 23/10/02 183 8699 -211282 23/10/03 182 8700 -211281 23/10/18 183 8700 -211398 23/10/12 182 8701 -211397 23/10/06 183 8701 -211264 23/10/10 182 8702 -211263 23/10/14 183 8702 -211832 23/10/13 182 8703 -211831 23/10/17 183 8703 -211798 23/09/28 182 8704 -211797 23/09/30 183 8704 -211570 23/10/15 182 8705 -211569 23/10/09 183 8705 -211140 23/10/01 182 8706 -211139 23/10/02 183 8706 -211420 23/10/16 182 8707 -211419 23/10/04 183 8707 -211682 23/10/09 182 8708 -211681 23/10/08 183 8708 -211328 23/10/14 182 8709 -211327 23/10/12 183 8709 -211460 23/09/27 182 8710 -211459 23/10/04 183 8710 -211596 23/10/16 182 8711 -211595 23/09/29 183 8711 -211758 23/09/29 182 8712 -211757 23/10/06 183 8712 -211536 23/10/08 182 8713 -211535 23/10/16 183 8713 -211786 23/10/01 182 8714 -211785 23/10/08 183 8714 -211318 23/10/06 182 8715 -211317 23/10/04 183 8715 -211698 23/10/06 182 8716 -211697 23/10/16 183 8716 -211408 23/10/16 182 8717 -211407 23/10/12 183 8717 -211688 23/10/15 182 8718 -211687 23/10/15 183 8718 -211870 23/10/06 182 8719 -211869 23/10/18 183 8719 -211662 23/10/09 182 8720 -211661 23/10/17 183 8720 -211776 23/10/15 182 8721 -211775 23/10/14 183 8721 -211446 23/10/09 182 8722 -211445 23/10/10 183 8722 -211424 23/10/03 182 8723 -211423 23/10/03 183 8723 -211168 23/10/01 182 8724 -211167 23/10/17 183 8724 -211246 23/10/02 182 8725 -211245 23/10/15 183 8725 -211410 23/10/17 182 8726 -211409 23/10/15 183 8726 -211298 23/10/05 182 8727 -211297 23/10/14 183 8727 -211482 23/10/01 182 8728 -211481 23/10/10 183 8728 -211172 23/10/12 182 8729 -211171 23/09/29 183 8729 -211436 23/10/04 182 8730 -211435 23/10/15 183 8730 -211212 24/04/21 182 8731 -211211 24/04/18 183 8731 -211748 24/04/18 182 8732 -211747 24/04/19 183 8732 -211364 24/04/22 182 8733 -211363 24/04/22 183 8733 -211478 24/04/23 182 8734 -211477 24/04/18 183 8734 -211488 24/04/20 182 8735 -211487 24/04/21 183 8735 -211506 24/04/19 182 8736 -211505 24/04/21 183 8736 -211376 24/04/23 182 8737 -211375 24/04/18 183 8737 -211236 24/04/22 182 8738 -211235 24/04/22 183 8738 -211308 24/04/21 182 8739 -211307 24/04/23 183 8739 -211718 24/04/19 182 8740 -211717 24/04/22 183 8740 -211738 24/04/22 182 8741 -211737 24/04/19 183 8741 -211554 24/04/19 182 8742 -211553 24/04/20 183 8742 -211620 24/04/20 182 8743 -211619 24/04/21 183 8743 -211610 24/04/21 182 8744 -211609 24/04/19 183 8744 -211182 24/04/19 182 8745 -211181 24/04/22 183 8745 -211528 24/04/20 182 8746 -211527 24/04/22 183 8746 -211770 24/04/22 182 8747 -211769 24/04/20 183 8747 -211486 24/04/21 182 8748 -211485 24/04/21 183 8748 -211588 24/04/19 182 8749 -211587 24/04/18 183 8749 -211752 24/04/21 182 8750 -211751 24/04/23 183 8750 -211234 24/04/17 182 8751 -211233 24/04/21 183 8751 -211270 24/04/21 182 8752 -211269 24/04/20 183 8752 -211810 24/04/17 182 8753 -211809 24/04/20 183 8753 -211650 24/04/21 182 8754 -211649 24/04/23 183 8754 -211594 24/04/18 182 8755 -211593 24/04/23 183 8755 -211730 24/04/18 182 8756 -211729 24/04/18 183 8756 -211474 24/04/22 182 8757 -211473 24/04/21 183 8757 -211330 24/04/22 182 8758 -211329 24/04/21 183 8758 -211490 24/04/19 182 8759 -211489 24/04/20 183 8759 -211802 24/04/21 182 8760 -211801 24/04/20 183 8760 -211134 24/04/21 182 8761 -211133 24/04/21 183 8761 -211136 24/04/22 182 8762 -211135 24/04/21 183 8762 -211558 24/04/21 182 8763 -211557 24/04/22 183 8763 -211186 24/04/21 182 8764 -211185 24/04/20 183 8764 -211576 24/04/22 182 8765 -211575 24/04/19 183 8765 -211148 24/04/22 182 8766 -211147 24/04/23 183 8766 -211316 24/04/20 182 8767 -211315 24/04/21 183 8767 -211622 24/04/23 182 8768 -211621 24/04/21 183 8768 -211764 24/04/23 182 8769 -211763 24/04/23 183 8769 -211452 24/04/22 182 8770 -211451 24/04/22 183 8770 -211732 24/04/18 182 8771 -211731 24/04/21 183 8771 -211830 24/04/19 182 8772 -211829 24/04/21 183 8772 -211190 24/04/19 182 8773 -211189 24/04/20 183 8773 -211476 24/04/23 182 8774 -211475 24/04/21 183 8774 -211124 24/04/18 182 8775 -211123 24/04/19 183 8775 -211772 24/04/19 182 8776 -211771 24/04/24 183 8776 -211534 24/04/21 182 8777 -211533 24/04/21 183 8777 -211860 24/04/19 182 8778 -211859 24/04/20 183 8778 -211834 24/04/19 182 8779 -211833 24/04/21 183 8779 -211302 24/04/22 182 8780 -211301 24/04/22 183 8780 -211394 24/04/20 182 8781 -211393 24/04/23 183 8781 -211346 24/04/23 182 8782 -211345 24/04/24 183 8782 -211314 24/04/20 182 8783 -211313 24/04/23 183 8783 -211492 24/04/20 182 8784 -211491 24/04/23 183 8784 -211426 24/04/18 182 8785 -211425 24/04/21 183 8785 -211532 24/04/18 182 8786 -211531 24/04/18 183 8786 -211634 24/04/18 182 8787 -211633 24/04/19 183 8787 -211340 24/04/18 182 8788 -211339 24/04/19 183 8788 -211564 24/04/22 182 8789 -211563 24/04/19 183 8789 -211164 24/04/19 182 8790 -211163 24/04/19 183 8790 -211784 24/04/21 182 8791 -211783 24/04/19 183 8791 -211130 24/04/18 182 8792 -211129 24/04/22 183 8792 -211324 24/04/20 182 8793 -211323 24/04/20 183 8793 -211840 24/04/22 182 8794 -211839 24/04/21 183 8794 -211284 24/04/22 182 8795 -211283 24/04/18 183 8795 -211384 24/04/18 182 8796 -211383 24/04/23 183 8796 -211156 24/04/19 182 8797 -211155 24/04/19 183 8797 -211796 24/04/17 182 8798 -211795 24/04/19 183 8798 -211150 24/04/20 182 8799 -211149 24/04/22 183 8799 -211658 24/04/19 182 8800 -211657 24/04/20 183 8800 -211342 24/04/22 182 8801 -211341 24/04/20 183 8801 -211582 24/04/21 182 8802 -211581 24/04/20 183 8802 -211204 24/04/23 182 8803 -211203 24/04/21 183 8803 -211158 24/04/18 182 8804 -211157 24/04/19 183 8804 -211304 24/04/20 182 8805 -211303 24/04/22 183 8805 -211226 24/04/17 182 8806 -211225 24/04/22 183 8806 -211222 24/04/19 182 8807 -211221 24/04/23 183 8807 -211332 24/04/18 182 8808 -211331 24/04/20 183 8808 -211208 24/04/18 182 8809 -211207 24/04/21 183 8809 -211312 24/04/19 182 8810 -211311 24/04/19 183 8810 -211404 24/04/21 182 8811 -211403 24/04/22 183 8811 -211338 24/04/22 182 8812 -211337 24/04/21 183 8812 -211598 24/04/23 182 8813 -211597 24/04/24 183 8813 -211726 24/04/22 182 8814 -211725 24/04/22 183 8814 -211538 24/04/20 182 8815 -211537 24/04/21 183 8815 -211674 24/04/20 182 8816 -211673 24/04/18 183 8816 -211854 24/04/21 182 8817 -211853 24/04/23 183 8817 -211842 24/04/22 182 8818 -211841 24/04/20 183 8818 -211368 24/04/22 182 8819 -211367 24/04/24 183 8819 -211216 24/04/21 182 8820 -211215 24/04/24 183 8820 -211152 24/04/19 182 8821 -211151 24/04/22 183 8821 -211126 24/04/20 182 8822 -211125 24/04/22 183 8822 -211720 24/04/19 182 8823 -211719 24/04/19 183 8823 -211812 24/04/18 182 8824 -211811 24/04/21 183 8824 -211836 24/04/22 182 8825 -211835 24/04/22 183 8825 -211260 24/04/19 182 8826 -211259 24/04/20 183 8826 -211642 24/04/20 182 8827 -211641 24/04/22 183 8827 -211846 24/04/17 182 8828 -211845 24/04/18 183 8828 -211502 24/04/21 182 8829 -211501 24/04/21 183 8829 -211286 24/04/20 182 8830 -211285 24/04/20 183 8830 -211788 24/04/22 182 8831 -211787 24/04/18 183 8831 -211122 24/04/23 182 8832 -211121 24/04/21 183 8832 -211604 24/04/19 182 8833 -211603 24/04/23 183 8833 -211550 24/04/22 182 8834 -211549 24/04/20 183 8834 -211590 24/04/20 182 8835 -211589 24/04/21 183 8835 -211866 24/04/21 182 8836 -211865 24/04/19 183 8836 -211668 24/04/21 182 8837 -211667 24/04/22 183 8837 -211200 24/04/21 182 8838 -211199 24/04/22 183 8838 -211790 24/04/22 182 8839 -211789 24/04/21 183 8839 -211504 24/04/19 182 8840 -211503 24/04/18 183 8840 -211700 24/04/21 182 8841 -211699 24/04/19 183 8841 -211600 24/04/21 182 8842 -211599 24/04/21 183 8842 -211806 24/04/22 182 8843 -211805 24/04/20 183 8843 -211292 24/04/21 182 8844 -211291 24/04/19 183 8844 -211676 24/04/18 182 8845 -211675 24/04/24 183 8845 -211696 24/04/20 182 8846 -211695 24/04/21 183 8846 -211470 24/04/22 182 8847 -211469 24/04/23 183 8847 -211296 24/04/21 182 8848 -211295 24/04/21 183 8848 -211326 24/04/20 182 8849 -211325 24/04/19 183 8849 -211170 24/04/19 182 8850 -211169 24/04/22 183 8850 -211278 24/04/17 182 8851 -211277 24/04/20 183 8851 -211814 24/04/18 182 8852 -211813 24/04/23 183 8852 -211592 24/04/22 182 8853 -211591 24/04/21 183 8853 -211358 24/04/23 182 8854 -211357 24/04/23 183 8854 -211166 24/04/19 182 8855 -211165 24/04/23 183 8855 -211174 24/04/20 182 8856 -211173 24/04/18 183 8856 -211518 24/04/21 182 8857 -211517 24/04/23 183 8857 -211196 24/04/19 182 8858 -211195 24/04/24 183 8858 -211198 24/04/21 182 8859 -211197 24/04/23 183 8859 -211230 24/04/17 182 8860 -211229 24/04/20 183 8860 -211258 24/04/18 182 8861 -211257 24/04/22 183 8861 -211858 24/04/19 182 8862 -211857 24/04/22 183 8862 -211616 24/04/18 182 8863 -211615 24/04/20 183 8863 -211472 24/04/21 182 8864 -211471 24/04/23 183 8864 -211820 24/04/17 182 8865 -211819 24/04/19 183 8865 -211192 24/04/19 182 8866 -211191 24/04/21 183 8866 -211750 24/04/21 182 8867 -211749 24/04/21 183 8867 -211666 24/04/18 182 8868 -211665 24/04/20 183 8868 -211450 24/04/19 182 8869 -211449 24/04/23 183 8869 -211250 24/04/17 182 8870 -211249 24/04/24 183 8870 -211652 24/04/20 182 8871 -211651 24/04/19 183 8871 -211188 24/04/21 182 8872 -211187 24/04/23 183 8872 -211740 24/04/20 182 8873 -211739 24/04/22 183 8873 -211176 24/04/23 182 8874 -211175 24/04/20 183 8874 -211804 24/04/20 182 8875 -211803 24/04/21 183 8875 -211464 24/04/22 182 8876 -211463 24/04/18 183 8876 -211808 24/04/21 182 8877 -211807 24/04/20 183 8877 -211456 23/06/04 182 8878 -211455 23/06/15 183 8878 -211228 23/06/12 182 8879 -211227 23/06/01 183 8879 -211614 23/06/01 182 8880 -211613 23/06/15 183 8880 -211370 23/05/30 182 8881 -211369 23/06/07 183 8881 -211526 23/05/31 182 8882 -211525 23/06/10 183 8882 -211466 23/06/02 182 8883 -211465 23/05/27 183 8883 -211354 23/06/07 182 8884 -211353 23/05/28 183 8884 -211458 23/05/30 182 8885 -211457 23/05/26 183 8885 -211422 23/05/31 182 8886 -211421 23/06/06 183 8886 -211220 23/06/03 182 8887 -211219 23/05/28 183 8887 -211272 23/06/11 182 8888 -211271 23/06/09 183 8888 -211856 23/06/13 182 8889 -211855 23/06/05 183 8889 -211248 23/06/08 182 8890 -211247 23/06/12 183 8890 -211386 23/06/10 182 8891 -211385 23/06/03 183 8891 -211382 23/05/29 182 8892 -211381 23/06/03 183 8892 -211276 23/06/05 182 8893 -211275 23/05/30 183 8893 -211444 23/06/01 182 8894 -211443 23/05/28 183 8894 -211552 23/05/27 182 8895 -211551 23/05/26 183 8895 -211238 23/05/26 182 8896 -211237 23/05/30 183 8896 -211244 23/05/29 182 8897 -211243 23/06/14 183 8897 -211690 23/05/26 182 8898 -211689 23/06/05 183 8898 -211704 23/05/26 182 8899 -211703 23/06/10 183 8899 -211214 23/06/08 182 8900 -211213 23/05/28 183 8900 -211754 23/05/29 182 8901 -211753 23/06/07 183 8901 -211414 23/06/13 182 8902 -211413 23/06/06 183 8902 -211868 23/05/27 182 8903 -211867 23/06/03 183 8903 -211510 23/05/27 182 8904 -211509 23/06/04 183 8904 -211254 23/06/10 182 8905 -211253 23/06/03 183 8905 -211630 23/06/01 182 8906 -211629 23/06/04 183 8906 -211850 23/06/02 182 8907 -211849 23/05/29 183 8907 -211146 23/05/30 182 8908 -211145 23/06/08 183 8908 -211348 23/06/01 182 8909 -211347 23/06/15 183 8909 -211826 23/06/04 182 8910 -211825 23/06/10 183 8910 -211580 23/06/03 182 8911 -211579 23/06/01 183 8911 -211210 23/06/01 182 8912 -211209 23/06/04 183 8912 -211824 23/06/12 182 8913 -211823 23/06/04 183 8913 -211838 23/05/31 182 8914 -211837 23/06/04 183 8914 -211402 23/06/01 182 8915 -211401 23/06/08 183 8915 -211782 23/06/14 182 8916 -211781 23/06/09 183 8916 -211586 23/06/14 182 8917 -211585 23/05/30 183 8917 -211516 23/06/14 182 8918 -211515 23/06/04 183 8918 -211692 23/06/06 182 8919 -211691 23/06/07 183 8919 -211206 23/05/30 182 8920 -211205 23/06/04 183 8920 -211290 23/06/02 182 8921 -211289 23/06/05 183 8921 -211280 23/06/12 182 8922 -211279 23/05/26 183 8922 -211560 23/06/11 182 8923 -211559 23/06/06 183 8923 -211300 23/06/04 182 8924 -211299 23/06/11 183 8924 -211540 23/06/12 182 8925 -211539 23/06/05 183 8925 -211626 23/05/25 182 8926 -211625 23/06/06 183 8926 -211418 23/06/03 182 8927 -211417 23/05/26 183 8927 -211672 23/05/31 182 8928 -211671 23/05/30 183 8928 -211546 23/05/30 182 8929 -211545 23/06/05 183 8929 -211242 23/06/04 182 8930 -211241 23/05/31 183 8930 -211656 23/05/28 182 8931 -211655 23/05/29 183 8931 -211716 23/06/08 182 8932 -211715 23/06/04 183 8932 -211848 23/05/31 182 8933 -211847 23/05/31 183 8933 -211628 23/06/11 182 8934 -211627 23/06/10 183 8934 -211356 23/05/27 182 8935 -211355 23/06/13 183 8935 -211844 23/06/09 182 8936 -211843 23/05/31 183 8936 -211268 23/05/28 182 8937 -211267 23/06/02 183 8937 -211762 23/05/25 182 8938 -211761 23/05/28 183 8938 -211816 23/05/28 182 8939 -211815 23/05/29 183 8939 -211602 23/05/30 182 8940 -211601 23/06/12 183 8940 -211710 23/05/25 182 8941 -211709 23/05/28 183 8941 -211612 23/06/10 182 8942 -211611 23/05/31 183 8942 -211202 23/05/31 182 8943 -211201 23/05/30 183 8943 -211390 23/05/31 182 8944 -211389 23/06/01 183 8944 -211396 23/05/29 182 8945 -211395 23/06/03 183 8945 -211344 23/05/29 182 8946 -211343 23/06/08 183 8946 -211508 23/05/25 182 8947 -211507 23/06/13 183 8947 -211160 23/06/04 182 8948 -211159 23/06/08 183 8948 -211742 23/06/12 182 8949 -211741 23/06/04 183 8949 -211462 23/05/29 182 8950 -211461 23/06/10 183 8950 -211434 23/06/09 182 8951 -211433 23/06/06 183 8951 -211442 23/06/03 182 8952 -211441 23/05/27 183 8952 -211406 23/06/07 182 8953 -211405 23/06/11 183 8953 -211468 23/05/28 182 8954 -211467 23/05/26 183 8954 -211378 23/06/09 182 8955 -211377 23/06/14 183 8955 -211684 23/06/06 182 8956 -211683 23/06/08 183 8956 -211618 23/06/03 182 8957 -211617 23/06/01 183 8957 -211724 23/05/28 182 8958 -211723 23/06/03 183 8958 -211274 23/06/13 182 8959 -211273 23/05/28 183 8959 -211708 23/05/31 182 8960 -211707 23/05/30 183 8960 -211184 23/06/01 182 8961 -211183 23/06/06 183 8961 -211262 23/05/28 182 8962 -211261 23/06/05 183 8962 -211430 23/06/08 182 8963 -211429 23/06/02 183 8963 -211392 23/06/09 182 8964 -211391 23/06/13 183 8964 -211494 23/06/07 182 8965 -211493 23/05/30 183 8965 -211644 23/06/03 182 8966 -211643 23/06/08 183 8966 -211412 23/05/31 182 8967 -211411 23/06/13 183 8967 -211154 23/05/30 182 8968 -211153 23/06/02 183 8968 -211500 23/06/02 182 8969 -211499 23/05/30 183 8969 -211544 23/06/04 182 8970 -211543 23/06/15 183 8970 -211548 23/06/10 182 8971 -211547 23/06/10 183 8971 -211648 23/06/05 182 8972 -211647 23/05/30 183 8972 -211334 23/05/27 182 8973 -211333 23/06/13 183 8973 -211862 23/06/01 182 8974 -211861 23/06/02 183 8974 -211350 23/05/27 182 8975 -211349 23/06/06 183 8975 -211800 23/06/02 182 8976 -211799 23/05/29 183 8976 -211256 23/05/25 182 8977 -211255 23/05/28 183 8977 -211792 23/05/30 182 8978 -211791 23/06/12 183 8978 -211530 23/06/02 182 8979 -211529 23/06/05 183 8979 -211680 23/06/02 182 8980 -211679 23/06/09 183 8980 -211362 23/05/26 182 8981 -211361 23/06/10 183 8981 -211632 23/06/09 182 8982 -211631 23/05/26 183 8982 -211670 23/05/27 182 8983 -211669 23/05/26 183 8983 -211756 23/05/29 182 8984 -211755 23/05/31 183 8984 -211522 23/06/10 182 8985 -211521 23/06/13 183 8985 -211388 23/06/02 182 8986 -211387 23/06/01 183 8986 -211320 23/06/14 182 8987 -211319 23/06/12 183 8987 -211144 23/06/11 182 8988 -211143 23/06/12 183 8988 -211728 23/06/04 182 8989 -211727 23/06/15 183 8989 -211746 23/06/13 182 8990 -211745 23/05/31 183 8990 -211566 23/05/25 182 8991 -211565 23/05/31 183 8991 -211322 23/06/01 182 8992 -211321 23/06/08 183 8992 -211624 23/05/27 182 8993 -211623 23/06/13 183 8993 -211706 23/06/04 182 8994 -211705 23/06/01 183 8994 -211138 23/06/13 182 8995 -211137 23/05/29 183 8995 -211562 23/06/13 182 8996 -211561 23/05/26 183 8996 -211702 23/06/07 182 8997 -211701 23/05/29 183 8997 -211694 23/06/11 182 8998 -211693 23/06/13 183 8998 -211584 23/05/31 182 8999 -211583 23/05/29 183 8999 -211822 23/05/27 182 9000 -211821 23/06/10 183 9000 -211778 23/05/29 182 9001 -211777 23/06/11 183 9001 -211240 23/06/08 182 9002 -211239 23/06/11 183 9002 -211556 23/05/26 182 9003 -211555 23/06/06 183 9003 -211736 23/05/30 182 9004 -211735 23/05/27 183 9004 -211714 23/05/28 182 9005 -211713 23/05/26 183 9005 -211128 23/06/10 182 9006 -211127 23/05/29 183 9006 -211654 23/06/11 182 9007 -211653 23/05/26 183 9007 -211606 23/05/29 182 9008 -211605 23/06/01 183 9008 -211774 23/05/25 182 9009 -211773 23/06/05 183 9009 -211574 23/05/30 182 9010 -211573 23/05/26 183 9010 -211132 23/05/25 182 9011 -211131 23/05/26 183 9011 -211162 23/06/02 182 9012 -211161 23/06/13 183 9012 -211438 23/06/12 182 9013 -211437 23/06/14 183 9013 -211646 23/06/14 182 9014 -211645 23/06/09 183 9014 -211744 23/06/13 182 9015 -211743 23/05/26 183 9015 -211232 23/06/05 182 9016 -211231 23/05/28 183 9016 -211306 23/06/02 182 9017 -211305 23/05/28 183 9017 -211218 23/06/09 182 9018 -211217 23/06/04 183 9018 -211524 23/05/29 182 9019 -211523 23/06/08 183 9019 -211416 23/05/27 182 9020 -211415 23/06/12 183 9020 -211678 23/06/10 182 9021 -211677 23/06/08 183 9021 -211520 23/06/12 182 9022 -211519 23/05/28 183 9022 -211818 23/06/13 182 9023 -211817 23/05/29 183 9023 -211448 23/05/26 182 9024 -211447 23/06/12 183 9024 -211638 23/05/29 182 9025 -211637 23/06/09 183 9025 -211768 23/06/08 182 9026 -211767 23/06/11 183 9026 -211664 23/05/29 182 9027 -211663 23/06/02 183 9027 -211428 23/05/31 182 9028 -211427 23/06/12 183 9028 -211142 23/06/04 182 9029 -211141 23/06/10 183 9029 -211578 23/05/29 182 9030 -211577 23/06/09 183 9030 -211498 23/06/06 182 9031 -211497 23/06/06 183 9031 diff --git a/src/tests/databases/ensembl_genome_metadata/dataset_source.txt b/src/tests/databases/ensembl_genome_metadata/dataset_source.txt deleted file mode 100644 index 50b12532..00000000 --- a/src/tests/databases/ensembl_genome_metadata/dataset_source.txt +++ /dev/null @@ -1,53 +0,0 @@ -1 core bacteria_0_collection_core_57_110_1 \N -4 core homo_sapiens_gca018473315v1_core_110_1 \N -5 core homo_sapiens_gca018469415v1_core_110_1 \N -6 core homo_sapiens_gca018469875v1_core_110_1 \N -7 core homo_sapiens_gca018505825v1_core_110_1 \N -9 core homo_sapiens_gca018852615v1_core_110_1 \N -14 core plasmodium_falciparum_core_57_110_1 \N -18 core homo_sapiens_gca021950905v1_core_110_1 \N -40 core homo_sapiens_37_core_110_37 \N -79 core triticum_aestivum_core_57_110_4 \N -92 core homo_sapiens_core_110_38 \N -97 core homo_sapiens_gca018505865v1_core_110_1 \N -100 core homo_sapiens_gca018852605v1_core_110_1 \N -107 core homo_sapiens_gca018469925v1_core_110_1 \N -135 core homo_sapiens_gca018469425v1_core_110_1 \N -179 core homo_sapiens_gca021951015v1_core_110_1 \N -185 core homo_sapiens_gca018473295v1_core_110_1 \N -214 core saccharomyces_cerevisiae_core_57_110_4 \N -217 core caenorhabditis_elegans_core_57_110_282 \N -251 compara homo_sapiens_gca018473315v1_compara_110 \N -266 compara homo_sapiens_37_compara_110 \N -284 compara homo_sapiens_gca018469425v1_compara_110 \N -294 compara homo_sapiens_gca018469925v1_compara_110 \N -313 compara homo_sapiens_gca018505825v1_compara_110 \N -323 compara plasmodium_falciparum_compara_110 \N -332 compara saccharomyces_cerevisiae_compara_110 \N -359 compara triticum_aestivum_compara_110 \N -368 compara homo_sapiens_gca018852605v1_compara_110 \N -369 compara caenorhabditis_elegans_compara_110 \N -383 compara homo_sapiens_gca018469415v1_compara_110 \N -387 compara homo_sapiens_gca018469875v1_compara_110 \N -423 compara homo_sapiens_gca018505865v1_compara_110 \N -424 compara homo_sapiens_compara_110 \N -429 compara homo_sapiens_gca021950905v1_compara_110 \N -433 compara escherichia_coli_str_k_12_substr_mg1655_gca_000005845_compara_110 \N -443 compara homo_sapiens_gca018852615v1_compara_110 \N -457 compara homo_sapiens_gca018473295v1_compara_110 \N -469 compara homo_sapiens_gca021951015v1_compara_110 \N -565 vcf /nfs/production/flicek/ensembl/variation/new_website/v110/api/114e90e0-aa35-4af6-9204-267c988328c3/variation.vcf.gz \N -571 vcf /nfs/production/flicek/ensembl/variation/new_website/v110/api/ef282def-9a17-4b35-a344-0f0c559e54ab/variation.vcf.gz \N -576 vcf /nfs/production/flicek/ensembl/variation/new_website/v110/api/9caa2cae-d1c8-4cfc-9ffd-2e13bc3e95b1/variation.vcf.gz \N -592 vcf /nfs/production/flicek/ensembl/variation/new_website/v110/api/75b7ac15-6373-4ad5-9fb7-23813a5355a4/variation.vcf.gz \N -595 vcf /nfs/production/flicek/ensembl/variation/new_website/v110/api/1220d766-6fcb-4b80-9106-121f238c0b3d/variation.vcf.gz \N -597 vcf /nfs/production/flicek/ensembl/variation/new_website/v110/api/8364a820-5485-42d7-a648-1a5eeb858319/variation.vcf.gz \N -608 vcf /nfs/production/flicek/ensembl/variation/new_website/v110/api/2020e8d5-4d87-47af-be78-0b15e48970a7/variation.vcf.gz \N -610 vcf /nfs/production/flicek/ensembl/variation/new_website/v110/api/65d4f21f-695a-4ed0-be67-5732a551fea4/variation.vcf.gz \N -644 vcf /nfs/production/flicek/ensembl/variation/new_website/v110/api/a733574a-93e7-11ec-a39d-005056b38ce3/variation.vcf.gz \N -653 vcf /nfs/production/flicek/ensembl/variation/new_website/v110/api/a73356e1-93e7-11ec-a39d-005056b38ce3/variation.vcf.gz \N -660 vcf /nfs/production/flicek/ensembl/variation/new_website/v110/api/a73357ab-93e7-11ec-a39d-005056b38ce3/variation.vcf.gz \N -670 regulation /nfs/production/flicek/ensembl/regulation/plins/110-mvp-handover/homo_sapiens/GRCh38/homo_sapiens.GRCh38.Regulatory_Build.regulatory_features.bb \N -673 vcf /nfs/production/flicek/ensembl/variation/new_website/v110/api/a7335667-93e7-11ec-a39d-005056b38ce3/variation.vcf.gz \N -674 vcf /nfs/production/flicek/ensembl/variation/new_website/v110/api/3704ceb1-948d-11ec-a39d-005056b38ce3/variation.vcf.gz \N -4352 compara escherichia_coli_str_k_12_substr_mg1655_compara_110 \N diff --git a/src/tests/databases/ensembl_genome_metadata/dataset_type.txt b/src/tests/databases/ensembl_genome_metadata/dataset_type.txt deleted file mode 100644 index 58817bb7..00000000 --- a/src/tests/databases/ensembl_genome_metadata/dataset_type.txt +++ /dev/null @@ -1,34 +0,0 @@ -1 assembly Genomic assembly assembly Compilation of sequences for a genome \N -2 genebuild Genomic Build genebuild_annotation Genomic annotations for an assembly \N -3 variation Variation Annotations variation_annotation Variation annotation set \N -4 evidence Variation Evidence production_process Variation evidence annotation 3 -5 short_variant Short variants production_process Short variant data 3 -6 homologies Comparative homologies compara_annotation Comparative Genomics annotation \N -7 regulatory_features Regulatory Annotation regulation_annotation Regulatory annotation for an assembly \N -8 genebuild_compute External References production_process Xref genome annotation for Genebuild 2 -9 genebuild_files Files dumps production_process File Dumps, either internal or for public consumption 2 -11 genebuild_web Web Geneset content production_process Web Geneset related content 2 -12 genebuild_prep Genebuild preparation production_preparation Web Content for Geneset publication 2 -13 xrefs External References production_process External annotations linking 8 -14 protein_features Protein Features annotations production_process Proteins annotation 8 -15 alpha_fold AlphaFold computation production_process Compute Protein structure with Alphafold 8 -16 blast Blast tools production_process Blast Indexes files 9 -17 ftp_dumps Public FTP files production_process Public FTP flat files geneset dumps 9 -18 thoas_dumps Thoas load flat files production_process Dump flat file to load onto THOAS 11 -19 thoas_load Thoas MongoDB Load production_preparation Load dumped files onto THOAS 12 -20 genebuild_browser_files Genome Browser BB Geneset files production_process Production BigBed for Genome Browser 11 -21 genebuild_track Geneset Tracks API production_preparation Register Geneset Track API BigBed files 12 -23 checksums Sequences Checksums production_process Compute core sequence checksums and update metadata 11 -24 refget_load Refget Loading production_preparation Load sequences and their checksum onto Refget app 12 -25 homology_compute Homology annotation production_process Compute Genome homology analysis 6 -26 homology_load Homology dataload production_preparation Load homology data onto Compara Service (MongoDB) 6 -27 homology_ftp Homology tsv public files production_preparation Dump and sync public TSV homology files 6 -28 vep VEP filesets variation_annotation VCF annotation file for geneset \N -29 variation_ftp Public Variation files (vcf) production_preparation VCF files for public FTP 3 -31 variation_browser_files Variation Browser files production_process Variation track browser file 3 -32 variation_track Variation Track production_preparation Variation Track API 3 -33 regulation_browser_files Regulation Browser files production_process Regulation track browser file 7 -34 regulation_track Regulation Track production_preparation Regulation Track API 7 -35 regulation_ftp Regulation Public files production_preparation Regulation public files 7 -37 web_genesearch GeneSearch Index production_publication Gene search indexes provisioning \N -38 web_genomediscovery Genome Search indexes loading to EBI search production_publication Load dumped data from genebuild_web onto EBI Search engine (SpecieSelector) \N diff --git a/src/tests/databases/ensembl_genome_metadata/ensembl_release.txt b/src/tests/databases/ensembl_genome_metadata/ensembl_release.txt deleted file mode 100644 index 67107deb..00000000 --- a/src/tests/databases/ensembl_genome_metadata/ensembl_release.txt +++ /dev/null @@ -1,6 +0,0 @@ -1 110.1 2020-10-18 2020-10-18 1 partial 1 Released 1 -2 110.2 2021-10-18 2021-10-18 0 partial 1 Prepared 2 -3 110.3 2022-10-18 2022-10-18 0 partial 1 Preparing 3 -4 112.0 2022-11-18 2022-11-18 0 partial 1 Planned 4 -5 108.0 2023-06-15 2023-06-15 0 partial 1 Released 5 -6 114.0 2025-06-15 2025-06-15 0 partial 1 Preparing 6 diff --git a/src/tests/databases/ensembl_genome_metadata/ensembl_site.txt b/src/tests/databases/ensembl_genome_metadata/ensembl_site.txt deleted file mode 100644 index 6ef04cbf..00000000 --- a/src/tests/databases/ensembl_genome_metadata/ensembl_site.txt +++ /dev/null @@ -1 +0,0 @@ -1 Ensembl MVP Ensembl https://beta.ensembl.org diff --git a/src/tests/databases/ensembl_genome_metadata/genome.txt b/src/tests/databases/ensembl_genome_metadata/genome.txt deleted file mode 100644 index 86e09709..00000000 --- a/src/tests/databases/ensembl_genome_metadata/genome.txt +++ /dev/null @@ -1,20 +0,0 @@ -1 a73351f7-93e7-11ec-a39d-005056b38ce3 2023-09-22 15:01:44 1 1 escherichia_coli_str_k_12_substr_mg1655 EXT01 2018-09 community 0 \N asm584v2 -4 9caa2cae-d1c8-4cfc-9ffd-2e13bc3e95b1 2023-09-22 15:02:00 4 4 homo_sapiens_gca018473315v1 ENS01 2022-08 ensembl 0 \N \N -5 2020e8d5-4d87-47af-be78-0b15e48970a7 2023-09-22 15:02:01 5 5 homo_sapiens_gca018469415v1 ENS01 2022-07 ensembl 0 \N \N -6 75b7ac15-6373-4ad5-9fb7-23813a5355a4 2023-09-22 15:02:02 6 6 homo_sapiens_gca018469875v1 ENS01 2022-07 ensembl 0 \N \N -7 1220d766-6fcb-4b80-9106-121f238c0b3d 2023-09-22 15:02:04 7 7 homo_sapiens_gca018505825v1 ENS01 2022-07 ensembl 0 \N \N -9 c3dcaca8-aaee-479f-aad8-c7a5e17b7e10 2023-09-22 15:02:11 9 9 homo_sapiens_gca018852615v1 ENS01 2022-07 ensembl 0 \N \N -12 a73356e1-93e7-11ec-a39d-005056b38ce3 2023-09-22 15:03:01 15 14 plasmodium_falciparum EXT01 2017-10 community 0 \N asm276v2 -19 56d9b469-097f-48a7-8501-c8416bcbcdfb 2023-09-22 15:03:02 18 9 homo_sapiens_gca021950905v1 ENS01 2022-07 ensembl 0 \N \N -31 3704ceb1-948d-11ec-a39d-005056b38ce3 2023-09-22 15:03:22 40 83 homo_sapiens_37 GENCODE19 2013-09 ensembl 0 \N grch37 -74 a73357ab-93e7-11ec-a39d-005056b38ce3 2023-09-22 15:04:29 79 72 triticum_aestivum EXT01 2018-04 community 0 \N iwgsc -86 a7335667-93e7-11ec-a39d-005056b38ce3 2023-09-22 15:04:45 92 83 homo_sapiens GENCODE44 2023-03 ensembl 0 \N grch38 -89 8364a820-5485-42d7-a648-1a5eeb858319 2023-09-22 15:04:50 97 7 homo_sapiens_gca018505865v1 ENS01 2022-07 ensembl 0 \N \N -92 63b4ffbf-0147-4aa7-b0af-7575bb822740 2023-09-22 15:04:53 100 9 homo_sapiens_gca018852605v1 ENS01 2022-07 ensembl 0 \N \N -99 ef282def-9a17-4b35-a344-0f0c559e54ab 2023-09-22 15:04:56 107 6 homo_sapiens_gca018469925v1 ENS01 2022-07 ensembl 0 \N \N -125 114e90e0-aa35-4af6-9204-267c988328c3 2023-09-22 15:05:37 135 5 homo_sapiens_gca018469425v1 ENS01 2022-07 ensembl 0 \N \N -169 af073c3e-d087-46b0-bb62-310e89982450 2023-09-22 15:06:39 180 9 homo_sapiens_gca021951015v1 ENS01 2022-07 ensembl 0 \N \N -174 65d4f21f-695a-4ed0-be67-5732a551fea4 2023-09-22 15:06:43 186 4 homo_sapiens_gca018473295v1 ENS01 2022-08 ensembl 0 \N \N -201 a733574a-93e7-11ec-a39d-005056b38ce3 2023-09-22 15:06:55 216 172 saccharomyces_cerevisiae EXT01 2018-10 community 0 \N r64-1-1 -203 a733550b-93e7-11ec-a39d-005056b38ce3 2023-09-22 15:06:58 219 175 caenorhabditis_elegans EXT01 2014-10 wormbase 0 \N wbcel235 -204 99999999-695a-4ed0-be67-5732a551fea4 2023-09-22 15:06:43 186 4 homo_sapiens_gca018473295v1 ENS09 2022-08 ensembl 0 \N \N diff --git a/src/tests/databases/ensembl_genome_metadata/genome_dataset.txt b/src/tests/databases/ensembl_genome_metadata/genome_dataset.txt deleted file mode 100644 index d0e4e0fb..00000000 --- a/src/tests/databases/ensembl_genome_metadata/genome_dataset.txt +++ /dev/null @@ -1,499 +0,0 @@ -1 1 1 1 5 -2 1 2 1 5 -7 1 7 4 1 -9 1 9 5 1 -11 1 11 6 2 -13 1 13 7 2 -14 1 14 7 2 -17 1 17 9 3 -23 1 23 12 5 -24 1 24 12 5 -37 0 37 19 \N -38 0 38 19 \N -61 1 61 31 5 -62 1 62 31 5 -147 1 147 74 5 -148 1 148 74 5 -171 1 171 86 5 -172 1 172 86 5 -177 1 177 89 3 -178 1 178 89 3 -183 0 183 92 \N -184 0 184 92 \N -197 1 197 99 2 -249 1 249 125 2 -250 1 250 125 2 -337 0 337 169 \N -338 0 338 169 \N -347 1 347 174 1 -348 1 348 174 1 -401 1 401 201 5 -402 1 402 201 5 -405 1 405 203 5 -406 1 406 203 5 -887 1 888 4 1 -888 1 890 5 1 -889 1 892 6 2 -891 1 896 9 3 -956 1 1006 99 2 -1332 1 1391 86 5 -1333 1 1392 31 5 -1334 1 1393 86 5 -1335 1 1394 31 5 -1346 1 1405 201 5 -1355 1 1414 12 5 -1362 1 1421 74 5 -1405 1 1464 125 2 -1411 1 1470 99 2 -1416 1 1475 4 1 -1432 1 1491 6 2 -1435 1 1494 7 2 -1437 1 1496 89 3 -1448 1 1507 5 1 -1450 1 1509 174 1 -1469 1 1528 201 5 -1478 1 1537 12 5 -1485 1 1544 74 5 -2217 1 2276 4 1 -2232 1 2291 31 5 -2260 1 2319 99 2 -2289 1 2348 12 5 -2298 1 2357 201 5 -2325 1 2384 74 5 -2335 1 2394 203 5 -2349 1 2408 5 1 -2390 1 2449 86 5 -2395 0 2454 19 \N -2399 1 2458 1 5 -2423 1 2482 174 1 -2435 0 2494 169 \N -2459 1 2518 86 5 -6538 0 6593 174 2 -6568 0 6623 31 2 -6644 0 6699 12 2 -6794 1 6849 89 \N -6841 1 6896 6 2 -7014 0 7069 203 2 -7122 1 7177 86 2 -7265 1 7320 125 2 -7480 0 7535 5 2 -7548 0 7603 74 2 -7730 1 7785 7 2 -7765 0 7820 4 2 -7792 0 7847 201 2 -8075 0 8130 92 \N -8337 0 8392 1 2 -8606 1 8661 9 \N -8607 0 8662 174 1 -8608 0 8663 174 1 -8609 0 8664 174 1 -8610 0 8665 174 1 -8611 0 8666 174 1 -8612 0 8667 174 1 -8613 0 8668 174 1 -8614 0 8669 174 1 -8615 0 8670 174 1 -8616 0 8671 174 1 -8617 0 8672 174 1 -8618 0 8673 174 1 -8619 0 8674 174 1 -8620 0 8675 174 1 -8621 0 8676 174 1 -8622 0 8677 4 1 -8623 0 8678 4 1 -8624 0 8679 4 1 -8625 0 8680 4 1 -8626 0 8681 4 1 -8627 0 8682 4 1 -8628 0 8683 4 1 -8629 0 8684 4 1 -8630 0 8685 4 1 -8631 0 8686 4 1 -8632 0 8687 4 1 -8633 0 8688 4 1 -8634 0 8689 4 1 -8635 0 8690 4 1 -8636 0 8691 4 1 -8637 0 8692 5 1 -8638 0 8693 5 1 -8639 0 8694 5 1 -8640 0 8695 5 1 -8641 0 8696 5 1 -8642 0 8697 5 1 -8643 0 8698 5 1 -8644 0 8699 5 1 -8645 0 8700 5 1 -8646 0 8701 5 1 -8647 0 8702 5 1 -8648 0 8703 5 1 -8649 0 8704 5 1 -8650 0 8705 5 1 -8651 0 8706 5 1 -8652 0 8707 4 1 -8653 0 8708 4 1 -8654 0 8709 4 1 -8655 0 8710 4 1 -8656 0 8711 4 1 -8657 0 8712 5 1 -8658 0 8713 5 1 -8659 0 8714 5 1 -8660 0 8715 5 1 -8661 0 8716 5 1 -8662 0 8717 174 1 -8663 0 8718 174 1 -8664 0 8719 174 1 -8665 0 8720 174 1 -8666 0 8721 174 1 -8667 0 8722 4 1 -8668 0 8723 4 1 -8669 0 8724 4 1 -8670 0 8725 5 1 -8671 0 8726 5 1 -8672 0 8727 5 1 -8673 0 8728 174 1 -8674 0 8729 174 1 -8675 0 8730 174 1 -8676 0 8731 7 2 -8677 0 8732 7 2 -8678 0 8733 7 2 -8679 0 8734 7 2 -8680 0 8735 7 2 -8681 0 8736 7 2 -8682 0 8737 7 2 -8683 0 8738 7 2 -8684 0 8739 7 2 -8685 0 8740 7 2 -8686 0 8741 7 2 -8687 0 8742 7 2 -8688 0 8743 7 2 -8689 0 8744 7 2 -8690 0 8745 7 2 -8691 0 8746 125 2 -8692 0 8747 125 2 -8693 0 8748 125 2 -8694 0 8749 125 2 -8695 0 8750 125 2 -8696 0 8751 125 2 -8697 0 8752 125 2 -8698 0 8753 125 2 -8699 0 8754 125 2 -8700 0 8755 125 2 -8701 0 8756 125 2 -8702 0 8757 125 2 -8703 0 8758 125 2 -8704 0 8759 125 2 -8705 0 8760 125 2 -8706 0 8761 6 2 -8707 0 8762 6 2 -8708 0 8763 6 2 -8709 0 8764 6 2 -8710 0 8765 6 2 -8711 0 8766 6 2 -8712 0 8767 6 2 -8713 0 8768 6 2 -8714 0 8769 6 2 -8715 0 8770 6 2 -8716 0 8771 6 2 -8717 0 8772 6 2 -8718 0 8773 6 2 -8719 0 8774 6 2 -8720 0 8775 6 2 -8721 0 8776 99 2 -8722 0 8777 99 2 -8723 0 8778 99 2 -8724 0 8779 99 2 -8725 0 8780 99 2 -8726 0 8781 99 2 -8727 0 8782 99 2 -8728 0 8783 99 2 -8729 0 8784 99 2 -8730 0 8785 99 2 -8731 0 8786 99 2 -8732 0 8787 99 2 -8733 0 8788 99 2 -8734 0 8789 99 2 -8735 0 8790 99 2 -8736 0 8791 125 2 -8737 0 8792 125 2 -8738 0 8793 125 2 -8739 0 8794 125 2 -8740 0 8795 125 2 -8741 0 8796 99 2 -8742 0 8797 99 2 -8743 0 8798 99 2 -8744 0 8799 99 2 -8745 0 8800 99 2 -8746 0 8801 6 2 -8747 0 8802 6 2 -8748 0 8803 6 2 -8749 0 8804 6 2 -8750 0 8805 6 2 -8751 0 8806 7 2 -8752 0 8807 7 2 -8753 0 8808 7 2 -8754 0 8809 7 2 -8755 0 8810 7 2 -8756 0 8811 99 2 -8757 0 8812 99 2 -8758 0 8813 99 2 -8759 0 8814 174 2 -8760 0 8815 174 2 -8761 0 8816 174 2 -8762 0 8817 31 2 -8763 0 8818 31 2 -8764 0 8819 31 2 -8765 0 8820 12 2 -8766 0 8821 12 2 -8767 0 8822 12 2 -8768 0 8823 6 2 -8769 0 8824 6 2 -8770 0 8825 6 2 -8771 0 8826 203 2 -8772 0 8827 203 2 -8773 0 8828 203 2 -8774 0 8829 86 2 -8775 0 8830 86 2 -8776 0 8831 86 2 -8777 0 8832 125 2 -8778 0 8833 125 2 -8779 0 8834 125 2 -8780 0 8835 5 2 -8781 0 8836 5 2 -8782 0 8837 5 2 -8783 0 8838 74 2 -8784 0 8839 74 2 -8785 0 8840 74 2 -8786 0 8841 7 2 -8787 0 8842 7 2 -8788 0 8843 7 2 -8789 0 8844 4 2 -8790 0 8845 4 2 -8791 0 8846 4 2 -8792 0 8847 201 2 -8793 0 8848 201 2 -8794 0 8849 201 2 -8795 0 8850 1 2 -8796 0 8851 1 2 -8797 0 8852 1 2 -8798 0 8853 89 3 -8799 0 8854 89 3 -8800 0 8855 89 3 -8801 0 8856 89 3 -8802 0 8857 89 3 -8803 0 8858 89 3 -8804 0 8859 89 3 -8805 0 8860 89 3 -8806 0 8861 89 3 -8807 0 8862 89 3 -8808 0 8863 89 3 -8809 0 8864 9 3 -8810 0 8865 9 3 -8811 0 8866 9 3 -8812 0 8867 9 3 -8813 0 8868 9 3 -8814 0 8869 9 3 -8815 0 8870 9 3 -8816 0 8871 9 3 -8817 0 8872 9 3 -8818 0 8873 9 3 -8819 0 8874 9 3 -8820 0 8875 89 3 -8821 0 8876 89 3 -8822 0 8877 89 3 -8823 0 8878 1 5 -8824 0 8879 1 5 -8825 0 8880 1 5 -8826 0 8881 1 5 -8827 0 8882 1 5 -8828 0 8883 1 5 -8829 0 8884 1 5 -8830 0 8885 1 5 -8831 0 8886 1 5 -8832 0 8887 1 5 -8833 0 8888 1 5 -8834 0 8889 1 5 -8835 0 8890 1 5 -8836 0 8891 1 5 -8837 0 8892 1 5 -8838 0 8893 12 5 -8839 0 8894 12 5 -8840 0 8895 12 5 -8841 0 8896 12 5 -8842 0 8897 12 5 -8843 0 8898 12 5 -8844 0 8899 12 5 -8845 0 8900 12 5 -8846 0 8901 12 5 -8847 0 8902 12 5 -8848 0 8903 12 5 -8849 0 8904 12 5 -8850 0 8905 12 5 -8851 0 8906 12 5 -8852 0 8907 12 5 -8853 0 8908 31 5 -8854 0 8909 31 5 -8855 0 8910 31 5 -8856 0 8911 31 5 -8857 0 8912 31 5 -8858 0 8913 31 5 -8859 0 8914 31 5 -8860 0 8915 31 5 -8861 0 8916 31 5 -8862 0 8917 31 5 -8863 0 8918 31 5 -8864 0 8919 31 5 -8865 0 8920 31 5 -8866 0 8921 31 5 -8867 0 8922 31 5 -8868 0 8923 74 5 -8869 0 8924 74 5 -8870 0 8925 74 5 -8871 0 8926 74 5 -8872 0 8927 74 5 -8873 0 8928 74 5 -8874 0 8929 74 5 -8875 0 8930 74 5 -8876 0 8931 74 5 -8877 0 8932 74 5 -8878 0 8933 74 5 -8879 0 8934 74 5 -8880 0 8935 74 5 -8881 0 8936 74 5 -8882 0 8937 74 5 -8883 0 8938 86 5 -8884 0 8939 86 5 -8885 0 8940 86 5 -8886 0 8941 86 5 -8887 0 8942 86 5 -8888 0 8943 86 5 -8889 0 8944 86 5 -8890 0 8945 86 5 -8891 0 8946 86 5 -8892 0 8947 86 5 -8893 0 8948 86 5 -8894 0 8949 86 5 -8895 0 8950 86 5 -8896 0 8951 86 5 -8897 0 8952 86 5 -8898 0 8953 201 5 -8899 0 8954 201 5 -8900 0 8955 201 5 -8901 0 8956 201 5 -8902 0 8957 201 5 -8903 0 8958 201 5 -8904 0 8959 201 5 -8905 0 8960 201 5 -8906 0 8961 201 5 -8907 0 8962 201 5 -8908 0 8963 201 5 -8909 0 8964 201 5 -8910 0 8965 201 5 -8911 0 8966 201 5 -8912 0 8967 201 5 -8913 0 8968 203 5 -8914 0 8969 203 5 -8915 0 8970 203 5 -8916 0 8971 203 5 -8917 0 8972 203 5 -8918 0 8973 203 5 -8919 0 8974 203 5 -8920 0 8975 203 5 -8921 0 8976 203 5 -8922 0 8977 203 5 -8923 0 8978 203 5 -8924 0 8979 203 5 -8925 0 8980 203 5 -8926 0 8981 203 5 -8927 0 8982 203 5 -8928 0 8983 86 5 -8929 0 8984 86 5 -8930 0 8985 86 5 -8931 0 8986 86 5 -8932 0 8987 86 5 -8933 0 8988 31 5 -8934 0 8989 31 5 -8935 0 8990 31 5 -8936 0 8991 31 5 -8937 0 8992 31 5 -8938 0 8993 201 5 -8939 0 8994 201 5 -8940 0 8995 201 5 -8941 0 8996 201 5 -8942 0 8997 201 5 -8943 0 8998 12 5 -8944 0 8999 12 5 -8945 0 9000 12 5 -8946 0 9001 12 5 -8947 0 9002 12 5 -8948 0 9003 74 5 -8949 0 9004 74 5 -8950 0 9005 74 5 -8951 0 9006 74 5 -8952 0 9007 74 5 -8953 0 9008 31 5 -8954 0 9009 31 5 -8955 0 9010 31 5 -8956 0 9011 12 5 -8957 0 9012 12 5 -8958 0 9013 12 5 -8959 0 9014 201 5 -8960 0 9015 201 5 -8961 0 9016 201 5 -8962 0 9017 74 5 -8963 0 9018 74 5 -8964 0 9019 74 5 -8965 0 9020 203 5 -8966 0 9021 203 5 -8967 0 9022 203 5 -8968 0 9023 86 5 -8969 0 9024 86 5 -8970 0 9025 86 5 -8971 0 9026 1 5 -8972 0 9027 1 5 -8973 0 9028 1 5 -8974 0 9029 86 5 -8975 0 9030 86 5 -8976 0 9031 86 5 -8977 0 9032 19 \N -8978 0 9033 19 \N -8979 0 9034 19 \N -8980 0 9035 19 \N -8981 0 9036 19 \N -8982 0 9037 19 \N -8983 0 9038 19 \N -8984 0 9039 19 \N -8985 0 9040 19 \N -8986 0 9041 19 \N -8987 0 9042 19 \N -8988 0 9043 92 \N -8989 0 9044 92 \N -8990 0 9045 92 \N -8991 0 9046 92 \N -8992 0 9047 92 \N -8993 0 9048 92 \N -8994 0 9049 92 \N -8995 0 9050 92 \N -8996 0 9051 92 \N -8997 0 9052 92 \N -8998 0 9053 92 \N -8999 0 9054 169 \N -9000 0 9055 169 \N -9001 0 9056 169 \N -9002 0 9057 169 \N -9003 0 9058 169 \N -9004 0 9059 169 \N -9005 0 9060 169 \N -9006 0 9061 169 \N -9007 0 9062 169 \N -9008 0 9063 169 \N -9009 0 9064 169 \N -9010 0 9065 19 \N -9011 0 9066 169 \N -9012 0 9067 89 \N -9013 0 9068 92 \N -9014 0 9069 9 \N -9015 0 9070 86 \N -9016 0 9071 86 \N -9017 0 9072 204 6 -9018 0 9073 204 6 -9019 0 9074 204 6 diff --git a/src/tests/databases/ensembl_genome_metadata/genome_release.txt b/src/tests/databases/ensembl_genome_metadata/genome_release.txt deleted file mode 100644 index 217aa3a1..00000000 --- a/src/tests/databases/ensembl_genome_metadata/genome_release.txt +++ /dev/null @@ -1,30 +0,0 @@ -1 1 4 1 -2 1 5 1 -3 1 174 1 -4 1 6 2 -5 1 7 2 -6 1 99 2 -7 1 125 2 -8 1 9 3 -9 1 89 3 -10 1 1 5 -11 1 12 5 -12 1 31 5 -13 1 74 5 -14 1 86 5 -15 1 201 5 -16 1 203 5 -17 0 174 2 -18 0 31 2 -19 0 12 2 -20 0 203 2 -21 0 86 2 -22 0 5 2 -23 0 74 2 -24 0 4 2 -25 0 201 2 -26 0 1 2 -27 0 19 4 -28 0 92 4 -29 0 169 4 -30 0 204 6 diff --git a/src/tests/databases/ensembl_genome_metadata/ncbi_taxa_name.txt b/src/tests/databases/ensembl_genome_metadata/ncbi_taxa_name.txt deleted file mode 100644 index dd6accd1..00000000 --- a/src/tests/databases/ensembl_genome_metadata/ncbi_taxa_name.txt +++ /dev/null @@ -1,469 +0,0 @@ -562 Achromobacter sp. ATCC 35328 includes -562 ATCC 11775 type material -562 Bacillus coli Migula 1895 authority -562 Bacillus coli synonym -562 bacterium 10a includes -562 Bacterium coli commune Escherich 1885 authority -562 Bacterium coli commune synonym -562 Bacterium coli (Migula 1895) Lehmann and Neumann 1896 authority -562 Bacterium coli synonym -562 bacterium E3 includes -562 CCUG 24 type material -562 CCUG 29300 type material -562 CIP 54.8 type material -562 DSM 30083 type material -562 E. coli common name -562 Enterococcus coli synonym -562 Escherichia coli (Migula 1895) Castellani and Chalmers 1919 authority -562 Escherichia coli scientific name -562 Escherichia/Shigella coli equivalent name -562 Escherichia sp. 3_2_53FAA includes -562 Escherichia sp. MAR includes -562 IAM 12119 type material -562 JCM 1649 type material -562 LMG 2092 type material -562 LMG:2092 type material -562 NBRC 102203 type material -562 NCCB 54008 type material -562 NCTC 9001 type material -562 strain U5/41 type material -3702 Arabidopsis thaliana (L.) Heynh., 1842 authority -3702 Arabidopsis thaliana scientific name -3702 Arabis thaliana L., 1753 authority -3702 Arabis thaliana synonym -3702 mouse-ear cress common name -3702 thale-cress common name -3702 thale cress genbank common name -3708 Brassica napus L., 1753 authority -3708 Brassica napus scientific name -3708 oilseed rape common name -3708 rape genbank common name -3708 rapeseeds common name -3711 Brassica rapa L., 1753 authority -3711 Brassica rapa scientific name -3711 field mustard genbank common name -3712 Brassica oleracea L., 1753 authority -3712 Brassica oleracea scientific name -3712 wild cabbage genbank common name -3847 Glycine max (L.) Merr., 1917 authority -3847 Glycine max scientific name -3847 Phaseolus max L., 1753 authority -3847 Phaseolus max synonym -3847 soybean genbank common name -3847 soybeans common name -3880 barrel medic genbank common name -3880 Medicago truncatula Gaertn., 1790 authority -3880 Medicago truncatula scientific name -4081 Lycopersicon esculentum Mill. authority -4081 Lycopersicon esculentum synonym -4081 Lycopersicon esculentum var. esculentum synonym -4081 Solanum esculentum Dunal authority -4081 Solanum esculentum synonym -4081 Solanum lycopersicum L., 1753 authority -4081 Solanum lycopersicum scientific name -4081 Solanum lycopersicum var. humboldtii synonym -4081 tomato genbank common name -4113 potatoes common name -4113 potato genbank common name -4113 Solanum tuberosum L., 1753 authority -4113 Solanum tuberosum scientific name -4113 Solanum tuberosum subsp. tuberosum includes -4513 barley common name -4513 Hordeum vulgare L., 1753 authority -4513 Hordeum vulgare scientific name -4530 Asian cultivated rice genbank common name -4530 Oryza sativa L., 1753 authority -4530 Oryza sativa scientific name -4530 red rice common name -4530 rice common name -4558 Andropogon sorghum (L.) Brot. authority -4558 Andropogon sorghum synonym -4558 broomcorn common name -4558 milo common name -4558 Sorghum bicolor (L.) Moench, 1794 authority -4558 Sorghum bicolor scientific name -4558 Sorghum bicolor subsp. bicolor synonym -4558 sorghum genbank common name -4558 Sorghum nervosum Besser ex Schult. authority -4558 Sorghum nervosum synonym -4558 Sorghum saccharatum (L.) Moench authority -4558 Sorghum saccharatum synonym -4558 Sorghum vulgare Pers. authority -4558 Sorghum vulgare synonym -4565 bread wheat genbank common name -4565 Canadian hard winter wheat common name -4565 common wheat common name -4565 Triticum aestivum L., 1753 authority -4565 Triticum aestivum scientific name -4565 Triticum aestivum subsp. aestivum synonym -4565 Triticum vulgare synonym -4565 Triticum vulgare Vill., 1787 authority -4565 wheat common name -4567 durum wheat genbank common name -4567 Triticum durum Desf. authority -4567 Triticum durum ssp. durum synonym -4567 Triticum durum subsp. durum synonym -4567 Triticum durum synonym -4567 Triticum rigidum conv. durum synonym -4567 Triticum rigidum ssp. durum synonym -4567 Triticum rigidum var. durum synonym -4567 Triticum turgidum Durum Group synonym -4567 Triticum turgidum subsp. durum (Desf.) Husn., 1899 authority -4567 Triticum turgidum subsp. durum scientific name -4571 cone wheat common name -4571 English wheat common name -4571 poulard wheat common name -4571 rivet wheat common name -4571 Triticum aethiopicum Jakubz., 1947 authority -4571 Triticum aethiopicum synonym -4571 Triticum durum subsp. abyssinicum synonym -4571 Triticum durum subsp. abyssinicum Vavilov, 1931 authority -4571 Triticum turgidum L., 1753 authority -4571 Triticum turgidum scientific name -4577 maize common name -4577 Zea mays L., 1753 authority -4577 Zea mays scientific name -4577 Zea mays var. japonica synonym -4932 ATCC 18824 type material -4932 baker's yeast common name -4932 brewer's yeast genbank common name -4932 Candida robusta synonym -4932 CBS 1171 type material -4932 Mycoderma cerevisiae Desm., 1827 authority -4932 Mycoderma cerevisiae synonym -4932 NRRL Y-12632 type material -4932 Saccharomyces capensis synonym -4932 Saccharomyces cerevisiae (Desm.) Meyen, 1838 authority -4932 Saccharomyces cerevisiae scientific name -4932 Saccharomyces cerevisiae 'var. diastaticus' equivalent name -4932 Saccharomyces diastaticus J. Andrews & R.B. Gilliland ex Van der Walt, 1965 authority -4932 Saccharomyces diastaticus synonym -4932 Saccharomyces italicus synonym -4932 Saccharomyces oviformis synonym -4932 Saccharomyces uvarum var. melibiosus synonym -4932 specimen-voucher:NRRL:Y:12632 type material -5833 malaria parasite P. falciparum genbank common name -5833 Plasmodium falciparum scientific name -5833 Plasmodium (Laverania) falciparum synonym -6239 Caenorhabditis elegans (Maupas, 1900) authority -6239 Caenorhabditis elegans scientific name -6239 Rhabditis elegans Maupas, 1900 authority -6239 Rhabditis elegans synonym -7227 Diptera sp. DNAS-2A9-224646 includes -7227 Drosophila melanogaster Meigen, 1830 authority -7227 Drosophila melanogaster scientific name -7227 fruit fly genbank common name -7227 Sophophora melanogaster (Meigen, 1830) authority -7227 Sophophora melanogaster synonym -7955 Brachydanio rerio frankei synonym -7955 Brachydanio rerio synonym -7955 Cyprinus rerio Hamilton, 1822 authority -7955 Cyprinus rerio synonym -7955 Danio frankei synonym -7955 Danio rerio frankei synonym -7955 Danio rerio (Hamilton, 1822) authority -7955 Danio rerio scientific name -7955 leopard danio common name -7955 zebra danio common name -7955 zebrafish genbank common name -7955 zebra fish common name -7994 Astyanax mexicanus (De Filippi, 1853) authority -7994 Astyanax mexicanus scientific name -7994 blind cave fish common name -7994 Mexican tetra genbank common name -7994 Tetragonopterus mexicanus De Filippi, 1853 authority -7994 Tetragonopterus mexicanus synonym -8030 Atlantic salmon genbank common name -8030 Salmo salar Linnaeus, 1758 authority -8030 Salmo salar scientific name -8090 Japanese medaka genbank common name -8090 Japanese rice fish common name -8090 medaka common name -8090 Oryzias latipes scientific name -8090 Oryzias latipes (Temminck & Schlegel, 1846) authority -8090 Poecilia latipes synonym -8090 Poecilia latipes Temminck & Schlegel, 1846 authority -8128 Nile tilapia genbank common name -8128 Oreochromis nilotica synonym -8128 Oreochromis niloticus (Linnaeus, 1758) authority -8128 Oreochromis niloticus scientific name -8128 Perca nilotica Linnaeus, 1758 authority -8128 Perca nilotica synonym -8128 Tilapia nilotica synonym -8364 Silurana tropicalis Gray, 1864 authority -8364 Silurana tropicalis synonym -8364 tropical clawed frog genbank common name -8364 western clawed frog common name -8364 Xenopus laevis tropicalis synonym -8364 Xenopus (Silurana) tropicalis synonym -8364 Xenopus tropicalis (Gray, 1864) authority -8364 Xenopus tropicalis scientific name -9031 bantam common name -9031 chicken genbank common name -9031 chickens common name -9031 dwarf Leghorn chickens includes -9031 Gallus domesticus equivalent name -9031 Gallus gallus domesticus synonym -9031 Gallus gallus scientific name -9031 Phasianus gallus Linnaeus, 1758 authority -9031 Phasianus gallus synonym -9031 red junglefowl includes -9413 greater false vampire bat common name -9413 Indian false vampire genbank common name -9413 Megaderma lyra Saint-Hilaire, 1810 authority -9413 Megaderma lyra scientific name -9544 Cercopithecus mulatta synonym -9544 Cercopithecus mulatta Zimmermann, 1780 authority -9544 Macaca mulatta scientific name -9544 Macaca mulatta (Zimmermann, 1780) authority -9544 rhesus macaque common name -9544 rhesus macaques common name -9544 Rhesus monkey genbank common name -9544 rhesus monkeys common name -9597 bonobo common name -9597 Pan paniscus Schwarz, 1929 authority -9597 Pan paniscus scientific name -9597 pygmy chimpanzee genbank common name -9598 chimpanzee genbank common name -9598 Pan troglodytes scientific name -9598 Simia troglodytes Linnaeus, 1758 authority -9598 Simia troglodytes synonym -9606 Homo sapiens Linnaeus, 1758 authority -9606 Homo sapiens scientific name -9606 human genbank common name -9612 Canis lupus Linnaeus, 1758 authority -9612 Canis lupus scientific name -9612 gray wolf genbank common name -9612 grey wolf common name -9615 beagle dog includes -9615 beagle dogs includes -9615 Canis canis synonym -9615 Canis domesticus synonym -9615 Canis familiaris Linnaeus, 1758 authority -9615 Canis familiaris synonym -9615 Canis lupus familiaris Linnaeus, 1758 authority -9615 Canis lupus familiaris scientific name -9615 dog genbank common name -9615 dogs common name -9685 cat common name -9685 cats common name -9685 domestic cat genbank common name -9685 Felis catus Linnaeus, 1758 authority -9685 Felis catus scientific name -9685 Felis domesticus synonym -9685 Felis silvestris catus synonym -9685 Korat cats includes -9685 Korat cats L. authority -9796 domestic horse common name -9796 equine common name -9796 Equus caballus Linnaeus, 1758 authority -9796 Equus caballus scientific name -9796 Equus przewalskii f. caballus synonym -9796 Equus przewalskii forma caballus synonym -9796 horse genbank common name -9823 pig genbank common name -9823 pigs common name -9823 Sus scrofa Linnaeus, 1758 authority -9823 Sus scrofa scientific name -9823 swine common name -9823 wild boar common name -9913 Bos bovis synonym -9913 Bos primigenius taurus synonym -9913 Bos taurus Linnaeus, 1758 authority -9913 Bos taurus scientific name -9913 Bovidae sp. Adi Nefas includes -9913 bovine common name -9913 cattle genbank common name -9913 cow common name -9913 dairy cow common name -9913 domestic cattle common name -9913 domestic cow common name -9913 ox common name -9925 African dwarf goat includes -9925 African dwarf goats includes -9925 Capra aegagrus hircus synonym -9925 Capra hircus Linnaeus, 1758 authority -9925 Capra hircus scientific name -9925 domestic goat common name -9925 goat genbank common name -9925 goats common name -9925 Naine d'Afrique de l'Ouest includes -9940 domestic sheep common name -9940 lambs common name -9940 Ovis ammon aries synonym -9940 Ovis aries Linnaeus, 1758 authority -9940 Ovis aries scientific name -9940 Ovis orientalis aries synonym -9940 Ovis ovis synonym -9940 sheep genbank common name -9940 wild sheep common name -9986 domestic rabbit common name -9986 European rabbit common name -9986 Japanese white rabbit common name -9986 Lepus cuniculus Linnaeus, 1758 authority -9986 Lepus cuniculus synonym -9986 New Zealand rabbit includes -9986 Oryctolagus cuniculus scientific name -9986 rabbit genbank common name -9986 rabbits common name -10029 Chinese hamster genbank common name -10029 Chinese hamsters common name -10029 CHO cell lines includes -10029 Cricetulus aureus equivalent name -10029 Cricetulus barabensis griseus synonym -10029 Cricetulus griseus Milne-Edwards, 1867 authority -10029 Cricetulus griseus scientific name -10089 Mus caroli Bonhote, 1902 authority -10089 Mus caroli scientific name -10089 Mus formosanus Kuroda, 1925 authority -10089 Mus formosanus synonym -10089 ricefield mouse common name -10089 Ryukyu mouse genbank common name -10090 house mouse genbank common name -10090 LK3 transgenic mice includes -10090 mouse common name -10090 Mus musculus Linnaeus, 1758 authority -10090 Mus musculus scientific name -10090 Mus sp. 129SV includes -10090 nude mice includes -10090 transgenic mice includes -10091 Mus castaneus synonym -10091 Mus musculus castaneus scientific name -10091 Mus musculus castaneus Waterhouse, 1843 authority -10091 southeastern Asian house mouse genbank common name -10092 Mus domesticus synonym -10092 Mus musculus domesticus Schwarz & Scharz 1943 authority -10092 Mus musculus domesticus scientific name -10092 Mus musculus praetextus synonym -10092 Mus praetextus synonym -10092 western European house mouse genbank common name -10093 Coelomys parahi synonym -10093 Gairdner's shrew-mouse common name -10093 Gairdner's shrewmouse common name -10093 Mus pahari scientific name -10093 Mus pahari Thomas, 1916 authority -10093 shrew mouse genbank common name -10096 Algerian mouse common name -10096 Mus musculus spretus synonym -10096 Mus spretus Lataste, 1883 authority -10096 Mus spretus scientific name -10096 western wild mouse genbank common name -10116 brown rat common name -10116 Buffalo rat includes -10116 laboratory rat includes -10116 Mus norvegicus Berkenhout, 1769 authority -10116 Mus norvegicus synonym -10116 Norway rat genbank common name -10116 rat common name -10116 rats common name -10116 Rattus norvegicus scientific name -10116 Rattus PC12 clone IS includes -10116 Rattus sp. strain Wistar includes -10116 Sprague-Dawley rat includes -10116 Wistar rats includes -10116 zitter rats includes -13616 Didelphys domestica synonym -13616 Didelphys domestica Wagner, 1842 authority -13616 gray short-tailed opossum genbank common name -13616 Monodelphis domestica scientific name -29760 Vitis vinifera L., 1753 authority -29760 Vitis vinifera scientific name -29760 Vitis vinifera subsp. vinifera synonym -29760 wine grape genbank common name -36329 Plasmodium falciparum 3D7 scientific name -36329 Plasmodium falciparum (isolate 3D7) synonym -37682 Aegilops squarrosa subsp. squarrosa synonym -37682 Aegilops squarrosa synonym -37682 Aegilops tauschii Coss., 1849 authority -37682 Aegilops tauschii scientific name -37682 Patropyrum tauschii (Coss.) A.Love authority -37682 Patropyrum tauschii subsp. tauschii synonym -37682 Patropyrum tauschii synonym -37682 Triticum aegilops P.Beauv. ex Roem. & Schult. authority -37682 Triticum aegilops synonym -37682 Triticum tauschii (Coss.) Schmalh. authority -37682 Triticum tauschii synonym -39442 eastern European house mouse genbank common name -39442 Mus musculus hortulanus synonym -39442 Mus musculus musculus scientific name -39946 Indian rice common name -39946 Indica rice common name -39946 long-grained rice genbank common name -39946 Oryza sativa (indica cultivar-group) synonym -39946 Oryza sativa Indica Group scientific name -39946 Oryza sativa (indica group) synonym -39946 Oryza sativa subsp. indica Kato authority -39946 Oryza sativa subsp. indica synonym -39946 Oryza sp. Poi-6 includes -39947 Japanese rice genbank common name -39947 Japonica rice common name -39947 Oryza sativa (japonica cultivar-group) synonym -39947 Oryza sativa Japonica Group scientific name -39947 Oryza sativa subsp. japonica synonym -109376 Brassica oleracea subsp. oleracea synonym -109376 Brassica oleracea var. oleracea scientific name -112509 domesticated barley genbank common name -112509 Hordeum sativum Jess. authority -112509 Hordeum sativum synonym -112509 Hordeum vulgare subsp. vulgare scientific name -112509 Hordeum vulgare subsp. vulgare Spenn. authority -112509 Hordeum vulgare var. nudum Spenn. authority -112509 Hordeum vulgare var. nudum synonym -112509 Hordeum vulgare var. vulgare synonym -112509 two-rowed barley common name -200361 Aegilops tauschii subsp. strangulata (Eig) Tzvelev, 1973 authority -200361 Aegilops tauschii subsp. strangulata scientific name -511145 Escherichia coli MG1655 synonym -511145 Escherichia coli strain MG1655 equivalent name -511145 Escherichia coli str. K12 substr. MG1655 equivalent name -511145 Escherichia coli str. K-12 substr. MG1655 scientific name -511145 Escherichia coli str. MG1655 equivalent name -559292 Saccharomyces cerevisiae S288C scientific name -1736656 Oryza sativa (javanica cultivar-group) synonym -1736656 Oryza sativa tropical japonica cultivar-group synonym -1736656 Oryza sativa tropical japonica group synonym -1736656 Oryza sativa tropical japonica subgroup scientific name -1736656 Oryza sativa var. javanica Koern. authority -1736656 Oryza sativa var. javanica synonym -1736658 Oryza sativa Aromatic Japonica Group synonym -1736658 Oryza sativa aromatic subgroup scientific name -1736658 Oryza sativa Group V synonym -1736659 Oryza sativa aus cultivar-group synonym -1736659 Oryza sativa aus group synonym -1736659 Oryza sativa aus subgroup scientific name -1736659 Oryza sativa aus synonym -3711 3706 merged_taxon_id -37682 4482 merged_taxon_id -112509 4514 merged_taxon_id -4577 4578 merged_taxon_id -9940 9936 merged_taxon_id -9986 9985 merged_taxon_id -7955 27702 merged_taxon_id -9986 34833 merged_taxon_id -10116 36465 merged_taxon_id -9685 36475 merged_taxon_id -9544 36502 merged_taxon_id -3708 36503 merged_taxon_id -4513 36528 merged_taxon_id -7955 37966 merged_taxon_id -4565 39424 merged_taxon_id -37682 40669 merged_taxon_id -4932 41870 merged_taxon_id -9925 57076 merged_taxon_id -8128 61227 merged_taxon_id -37682 70688 merged_taxon_id -4571 77607 merged_taxon_id -10090 85055 merged_taxon_id -4113 90692 merged_taxon_id -10029 143285 merged_taxon_id -4081 195582 merged_taxon_id -10092 210727 merged_taxon_id -4565 235075 merged_taxon_id -9913 272461 merged_taxon_id -4530 389215 merged_taxon_id -562 469598 merged_taxon_id -562 662101 merged_taxon_id -562 662104 merged_taxon_id -562 1637691 merged_taxon_id -562 1806490 merged_taxon_id -7227 2267365 merged_taxon_id diff --git a/src/tests/databases/ensembl_genome_metadata/ncbi_taxa_node.txt b/src/tests/databases/ensembl_genome_metadata/ncbi_taxa_node.txt deleted file mode 100644 index 90b0b774..00000000 --- a/src/tests/databases/ensembl_genome_metadata/ncbi_taxa_node.txt +++ /dev/null @@ -1,64 +0,0 @@ -562 561 species 1 576069 582930 1 -3702 3701 species 1 2083678 2083679 1 -3708 3705 species 1 2087484 2087491 1 -3711 3705 species 1 2087500 2087531 1 -3712 3705 species 1 2087532 2087565 1 -3847 1462606 species 1 2065183 2065184 1 -3880 3877 species 1 2056937 2056944 1 -4081 49274 species 1 1940956 1940959 1 -4113 4107 species 1 1940913 1940916 1 -4513 4512 species 1 1729674 1729689 1 -4530 4527 species 0 1724403 1724424 1 -4558 4557 species 1 1712790 1712793 1 -4565 4564 species 1 1730800 1730831 1 -4567 4571 subspecies 1 1730853 1730854 1 -4571 4564 species 1 1730852 1730867 1 -4577 4575 species 1 1712922 1712933 1 -4932 4930 species 1 2220338 2220971 1 -5833 418107 species 1 4998850 4999195 1 -6239 6237 species 1 2641603 2641604 1 -7227 32351 species 1 3734717 3734718 1 -7955 7954 species 1 4736348 4736349 1 -7994 7993 species 1 4744616 4744617 1 -8030 8028 species 1 4766921 4766922 1 -8090 8089 species 1 4802895 4802896 1 -8128 8139 species 1 4796928 4796935 1 -8364 8363 species 1 4865014 4865015 1 -9031 9030 species 1 4926358 4926369 1 -9413 9412 species 1 4936848 4936849 1 -9544 9539 species 1 4948106 4948113 1 -9597 9596 species 1 4948688 4948689 1 -9598 9596 species 1 4948690 4948701 1 -9606 9605 species 1 4948704 4948709 1 -9612 9611 species 1 4942982 4943023 1 -9615 9612 subspecies 1 4942983 4942984 1 -9685 9682 species 1 4942587 4942588 1 -9796 9789 species 1 4941821 4941822 1 -9823 9822 species 1 4945820 4945851 1 -9913 9903 species 1 4945434 4945435 1 -9925 9922 species 1 4945068 4945071 1 -9940 9935 species 1 4945118 4945123 1 -9986 9984 species 1 4949258 4949263 1 -10029 10028 species 1 4956857 4956858 1 -10089 862507 species 1 4953572 4953573 1 -10090 862507 species 1 4953574 4953607 1 -10091 10090 subspecies 1 4953575 4953576 1 -10092 10090 subspecies 1 4953577 4953578 1 -10093 862508 species 1 4953660 4953661 1 -10096 862507 species 1 4953608 4953609 1 -10116 10114 species 1 4953915 4953918 1 -13616 13615 species 1 4932488 4932489 1 -29760 3603 species 1 1989578 1989583 1 -36329 5833 isolate 1 4998881 4998882 1 -37682 4480 species 1 1730742 1730751 1 -39442 10090 subspecies 1 4953581 4953582 1 -39946 4530 no rank 1 1724404 1724409 1 -39947 4530 no rank 1 1724410 1724417 1 -109376 3712 varietas 1 2087543 2087544 1 -112509 4513 subspecies 1 1729677 1729686 1 -200361 37682 subspecies 1 1730749 1730750 1 -511145 83333 no rank 1 576077 576078 1 -559292 4932 strain 1 2220367 2220368 1 -1736656 39947 no rank 1 1724411 1724412 1 -1736658 39947 no rank 1 1724415 1724416 1 -1736659 39946 no rank 1 1724405 1724406 1 diff --git a/src/tests/databases/ensembl_genome_metadata/organism.txt b/src/tests/databases/ensembl_genome_metadata/organism.txt deleted file mode 100644 index c0bdff55..00000000 --- a/src/tests/databases/ensembl_genome_metadata/organism.txt +++ /dev/null @@ -1,11 +0,0 @@ -1 511145 562 Escherichia coli K-12 K-12 substr. MG1655 Escherichia coli str. K-12 substr. MG1655 str. K12 SAMN02604091 E coli K 12 1e579f8d-3880-424e-9b4f-190eb69280d9 strain 0 -4 9606 9606 human Gambian in Western Division Homo sapiens SAMN17861670 Human 18bd7042-d861-4a10-b5d0-68c8bccfc87e population 0 -5 9606 9606 human Esan in Nigeria Homo sapiens SAMN17861241 Human a3352834-cea1-40aa-9dad-98581620c36b population 0 -6 9606 9606 human Gambian in Western Division Homo sapiens SAMN17861664 Human 87fb40f8-563b-4095-9fce-2bafa77ffba1 population 0 -7 9606 9606 human African from Barbados Homo sapiens SAMN13958415 Human 7f1653e1-9be5-4313-9fe9-800ae18d87b4 population 0 -9 9606 9606 human European Homo sapiens SAMN03283347 Human b0e689ba-889b-40af-8ab9-7675f9df79b6 population 0 -14 36329 5833 Malaria parasite \N Plasmodium falciparum 3D7 SAMN00102897 \N c867d142-85c4-4a5d-8361-b3f7f5fa3544 \N 0 -72 4565 4565 Bread wheat Chinese Spring Triticum aestivum SAMEA4791365 Wheat 86dd50f1-421e-4829-aca5-13ccc9a459f6 cultivar 0 -83 9606 9606 Human \N Homo sapiens SAMN12121739 Human 1d336185-affe-4a91-85bb-04ebd73cbb56 \N 999 -172 559292 4932 Baker's yeast S288C Saccharomyces cerevisiae S288c SAMEA3184125 Bakers yeast ae962453-0287-4201-83b8-3847c7d8027d strain 0 -175 6239 6239 Roundworm N2 Caenorhabditis elegans SAMN04256190 Roundworm b181947a-a725-4866-ada4-5433e5dfdcac strain 0 diff --git a/src/tests/databases/ensembl_genome_metadata/organism_group.txt b/src/tests/databases/ensembl_genome_metadata/organism_group.txt deleted file mode 100644 index 37527c46..00000000 --- a/src/tests/databases/ensembl_genome_metadata/organism_group.txt +++ /dev/null @@ -1,8 +0,0 @@ -1 Division EnsemblBacteria bacteria -2 Division EnsemblVertebrates vertebrates -3 Division EnsemblPlants plants -5 Division EnsemblProtists protists -9 Division EnsemblMetazoa metazoa -12 Division EnsemblFungi fungi -13 Internal Populars popular -14 Test EnsemblTest TestDivision diff --git a/src/tests/databases/ensembl_genome_metadata/organism_group_member.txt b/src/tests/databases/ensembl_genome_metadata/organism_group_member.txt deleted file mode 100644 index ddf7fa31..00000000 --- a/src/tests/databases/ensembl_genome_metadata/organism_group_member.txt +++ /dev/null @@ -1,17 +0,0 @@ -1 0 1 1 \N -4 0 4 2 \N -5 0 5 2 \N -6 0 6 2 \N -7 0 7 2 \N -9 0 9 2 \N -12 0 14 5 \N -67 0 72 3 \N -77 0 83 2 \N -159 0 172 12 \N -161 0 175 9 \N -193 0 83 13 1 -196 0 72 13 4 -215 0 172 13 23 -216 0 175 13 24 -233 0 1 13 41 -234 0 14 13 42 diff --git a/src/tests/databases/ensembl_genome_metadata/table.sql b/src/tests/databases/ensembl_genome_metadata/table.sql deleted file mode 100644 index a59fa934..00000000 --- a/src/tests/databases/ensembl_genome_metadata/table.sql +++ /dev/null @@ -1,306 +0,0 @@ -CREATE TABLE `assembly` -( - `assembly_id` int(11) NOT NULL AUTO_INCREMENT, - `ucsc_name` varchar(16) DEFAULT NULL, - `accession` varchar(16) NOT NULL, - `level` varchar(32) NOT NULL, - `name` varchar(128) NOT NULL, - `accession_body` varchar(32) DEFAULT NULL, - `assembly_default` varchar(128) DEFAULT NULL, - `tol_id` varchar(32) DEFAULT NULL, - `created` datetime(6) DEFAULT NULL, - `ensembl_name` varchar(255) DEFAULT NULL, - `assembly_uuid` char(36) NOT NULL, - `is_reference` tinyint(1) NOT NULL, - PRIMARY KEY (`assembly_id`), - UNIQUE KEY `accession` (`accession`), - UNIQUE KEY `assembly_uuid` (`assembly_uuid`), - UNIQUE KEY `ensembl_name` (`ensembl_name`) -) ENGINE=InnoDB AUTO_INCREMENT=220 DEFAULT CHARSET=latin1; - -CREATE TABLE `assembly_sequence` -( - `assembly_sequence_id` int(11) NOT NULL AUTO_INCREMENT, - `name` varchar(128) DEFAULT NULL, - `accession` varchar(128) NOT NULL, - `chromosomal` tinyint(1) NOT NULL DEFAULT '0', - `length` int(11) NOT NULL, - `sequence_location` varchar(10) DEFAULT NULL, - `md5` varchar(32) DEFAULT NULL, - `assembly_id` int(11) NOT NULL, - `chromosome_rank` int(11) DEFAULT NULL, - `sha512t24u` varchar(128) DEFAULT NULL, - `is_circular` tinyint(1) NOT NULL DEFAULT '0', - `type` varchar(26) NOT NULL, - `additional` tinyint(1) NOT NULL DEFAULT '0', - `source` varchar(120) DEFAULT NULL, - PRIMARY KEY (`assembly_sequence_id`), - UNIQUE KEY `assembly_sequence_assembly_id_accession_5f3e5119_uniq` (`assembly_id`,`accession`), - KEY `assembly_sequence_assembly_id_chromosomal_index` (`assembly_id`,`chromosomal`), - KEY `assembly_sequence_name_assembly_id_index` (`name`,`assembly_id`), - CONSTRAINT `assembly_sequence_assembly_id_2a84ddcb_fk_assembly_assembly_id` FOREIGN KEY (`assembly_id`) REFERENCES `assembly` (`assembly_id`) ON DELETE CASCADE -) ENGINE=InnoDB AUTO_INCREMENT=3785696 DEFAULT CHARSET=latin1; - -CREATE TABLE `attribute` -( - `attribute_id` int(11) NOT NULL AUTO_INCREMENT, - `name` varchar(128) NOT NULL, - `label` varchar(128) NOT NULL, - `description` varchar(255) DEFAULT NULL, - `type` enum('string','integer','bp','percent','float') NOT NULL, - `required` tinyint(1) NOT NULL DEFAULT '0', - PRIMARY KEY (`attribute_id`), - UNIQUE KEY `name` (`name`), - UNIQUE KEY `name_2` (`name`), - UNIQUE KEY `name_3` (`name`) -) ENGINE=InnoDB AUTO_INCREMENT=198 DEFAULT CHARSET=latin1; - -CREATE TABLE `dataset` -( - `dataset_id` int(11) NOT NULL AUTO_INCREMENT, - `dataset_uuid` char(36) NOT NULL, - `name` varchar(128) NOT NULL, - `version` varchar(128) DEFAULT NULL, - `created` datetime(6) NOT NULL, - `label` varchar(128) NOT NULL, - `dataset_source_id` int(11) NOT NULL, - `dataset_type_id` int(11) NOT NULL, - `status` enum('Submitted','Processing','Processed','Released','Faulty','Suppressed') NOT NULL DEFAULT 'Submitted', - `parent_id` int(11) DEFAULT NULL, - PRIMARY KEY (`dataset_id`), - KEY `dataset_dataset_source_id_fd96f115_fk_dataset_s` (`dataset_source_id`), - KEY `dataset_dataset_type_id_47284562_fk_dataset_type_dataset_type_id` (`dataset_type_id`), - KEY `dataset_parent_id_fk` (`parent_id`), - CONSTRAINT `dataset_dataset_source_id_fd96f115_fk_dataset_s` FOREIGN KEY (`dataset_source_id`) REFERENCES `dataset_source` (`dataset_source_id`) ON DELETE CASCADE, - CONSTRAINT `dataset_dataset_type_id_47284562_fk_dataset_type_dataset_type_id` FOREIGN KEY (`dataset_type_id`) REFERENCES `dataset_type` (`dataset_type_id`), - CONSTRAINT `dataset_parent_id_fk` FOREIGN KEY (`parent_id`) REFERENCES `dataset` (`dataset_id`) ON DELETE CASCADE -) ENGINE=InnoDB AUTO_INCREMENT=9075 DEFAULT CHARSET=latin1; - -CREATE TABLE `dataset_attribute` -( - `dataset_attribute_id` int(11) NOT NULL AUTO_INCREMENT, - `value` varchar(128) DEFAULT NULL, - `attribute_id` int(11) NOT NULL, - `dataset_id` int(11) NOT NULL, - PRIMARY KEY (`dataset_attribute_id`), - UNIQUE KEY `dataset_attribute_dataset_id_attribute_id_value_4d1ddfaf_uniq` (`dataset_id`,`attribute_id`,`value`), - KEY `dataset_attribute_attribute_id_55c51407_fk_attribute` (`attribute_id`), - KEY `dataset_attribute_dataset_id_2e2afe19` (`dataset_id`), - CONSTRAINT `dataset_attribute_attribute_id_55c51407_fk_attribute` FOREIGN KEY (`attribute_id`) REFERENCES `attribute` (`attribute_id`) ON DELETE CASCADE, - CONSTRAINT `dataset_attribute_dataset_id_2e2afe19_fk_dataset_dataset_id` FOREIGN KEY (`dataset_id`) REFERENCES `dataset` (`dataset_id`) ON DELETE CASCADE -) ENGINE=InnoDB AUTO_INCREMENT=211873 DEFAULT CHARSET=latin1; - -CREATE TABLE `dataset_source` -( - `dataset_source_id` int(11) NOT NULL AUTO_INCREMENT, - `type` varchar(32) NOT NULL, - `name` varchar(255) NOT NULL, - `location` varchar(120) DEFAULT NULL, - PRIMARY KEY (`dataset_source_id`), - UNIQUE KEY `name` (`name`) -) ENGINE=InnoDB AUTO_INCREMENT=4353 DEFAULT CHARSET=latin1; - -CREATE TABLE `dataset_type` -( - `dataset_type_id` int(11) NOT NULL AUTO_INCREMENT, - `name` varchar(32) NOT NULL, - `label` varchar(128) NOT NULL, - `topic` varchar(32) NOT NULL, - `description` varchar(255) DEFAULT NULL, - `parent_id` int(11) DEFAULT NULL, - PRIMARY KEY (`dataset_type_id`), - UNIQUE KEY `name` (`name`), - KEY `dataset_type_parent_id_fk` (`parent_id`), - CONSTRAINT `dataset_type_parent_id_fk` FOREIGN KEY (`parent_id`) REFERENCES `dataset_type` (`dataset_type_id`) ON DELETE SET NULL -) ENGINE=InnoDB AUTO_INCREMENT=39 DEFAULT CHARSET=latin1; - -CREATE TABLE `ensembl_release` -( - `release_id` int(11) NOT NULL AUTO_INCREMENT, - `version` decimal(10, 1) NOT NULL, - `release_date` date NOT NULL, - `label` varchar(64) NOT NULL, - `is_current` tinyint(1) NOT NULL DEFAULT '0', - `release_type` enum('integrated','partial') NOT NULL, - `site_id` int(11) NOT NULL, - `status` varchar(12) NOT NULL, - `name` varchar(3) DEFAULT NULL, - PRIMARY KEY (`release_id`), - UNIQUE KEY `ensembl_release_version_site_id_b743399a_uniq` (`version`,`site_id`), - KEY `ensembl_release_site_id_7c2f537a_fk_ensembl_site_site_id` (`site_id`), - CONSTRAINT `ensembl_release_site_id_7c2f537a_fk_ensembl_site_site_id` FOREIGN KEY (`site_id`) REFERENCES `ensembl_site` (`site_id`) -) ENGINE=InnoDB AUTO_INCREMENT=7 DEFAULT CHARSET=latin1; - -CREATE TABLE `ensembl_site` -( - `site_id` int(11) NOT NULL AUTO_INCREMENT, - `name` varchar(64) NOT NULL, - `label` varchar(64) NOT NULL, - `uri` varchar(64) NOT NULL, - PRIMARY KEY (`site_id`) -) ENGINE=InnoDB AUTO_INCREMENT=2 DEFAULT CHARSET=latin1; - -CREATE TABLE `genome` -( - `genome_id` int(11) NOT NULL AUTO_INCREMENT, - `genome_uuid` char(36) NOT NULL, - `created` datetime(6) NOT NULL, - `assembly_id` int(11) NOT NULL, - `organism_id` int(11) NOT NULL, - `production_name` varchar(120) NOT NULL, - `genebuild_version` varchar(64) NOT NULL, - `genebuild_date` varchar(20) NOT NULL, - `annotation_source` varchar(120) NOT NULL, - `suppressed` tinyint(1) NOT NULL DEFAULT '0', - `suppression_details` varchar(255) DEFAULT NULL, - `url_name` varchar(128) DEFAULT NULL, - PRIMARY KEY (`genome_id`), - UNIQUE KEY `genome_genome_uuid_6b62d0ad_uniq` (`genome_uuid`), - KEY `genome_assembly_id_0a748388_fk_assembly_assembly_id` (`assembly_id`), - KEY `genome_organism_id_99ad7f35_fk_organism_organism_id` (`organism_id`), - CONSTRAINT `genome_assembly_id_0a748388_fk_assembly_assembly_id` FOREIGN KEY (`assembly_id`) REFERENCES `assembly` (`assembly_id`) ON DELETE CASCADE, - CONSTRAINT `genome_organism_id_99ad7f35_fk_organism_organism_id` FOREIGN KEY (`organism_id`) REFERENCES `organism` (`organism_id`) ON DELETE CASCADE -) ENGINE=InnoDB AUTO_INCREMENT=205 DEFAULT CHARSET=latin1; - -CREATE TABLE `genome_dataset` -( - `genome_dataset_id` int(11) NOT NULL AUTO_INCREMENT, - `is_current` tinyint(1) NOT NULL, - `dataset_id` int(11) NOT NULL, - `genome_id` int(11) NOT NULL, - `release_id` int(11) DEFAULT NULL, - PRIMARY KEY (`genome_dataset_id`), - UNIQUE KEY `uk_genome_dataset` (`dataset_id`,`genome_id`), - KEY `genome_dataset_genome_id_21d55a50_fk_genome_genome_id` (`genome_id`), - KEY `genome_dataset_release_id_1903f87c_fk_ensembl_release_release_id` (`release_id`), - CONSTRAINT `genome_dataset_dataset_id_0e9b7c99_fk_dataset_dataset_id` FOREIGN KEY (`dataset_id`) REFERENCES `dataset` (`dataset_id`) ON DELETE CASCADE, - CONSTRAINT `genome_dataset_genome_id_21d55a50_fk_genome_genome_id` FOREIGN KEY (`genome_id`) REFERENCES `genome` (`genome_id`) ON DELETE CASCADE, - CONSTRAINT `genome_dataset_release_id_1903f87c_fk_ensembl_release_release_id` FOREIGN KEY (`release_id`) REFERENCES `ensembl_release` (`release_id`) ON DELETE SET NULL -) ENGINE=InnoDB AUTO_INCREMENT=9020 DEFAULT CHARSET=latin1; - -CREATE TABLE `genome_group` -( - `genome_group_id` int(11) NOT NULL AUTO_INCREMENT, - `type` enum('compara_reference','structural_variant','project') NOT NULL, - `name` varchar(128) NOT NULL, - `label` varchar(128) DEFAULT NULL, - `searchable` tinyint(1) NOT NULL DEFAULT '0', - `description` varchar(255) DEFAULT NULL, - PRIMARY KEY (`genome_group_id`), - UNIQUE KEY `unique_type_name` (`type`,`name`), - KEY `idx_type` (`type`), - KEY `idx_searchable` (`searchable`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=COMPACT; - -CREATE TABLE `genome_group_member` -( - `genome_group_member_id` int(11) NOT NULL AUTO_INCREMENT, - `is_reference` tinyint(1) NOT NULL DEFAULT '0', - `genome_id` int(11) NOT NULL, - `genome_group_id` int(11) NOT NULL, - `release_id` int(11) DEFAULT NULL, - `is_current` tinyint(1) NOT NULL DEFAULT '0', - PRIMARY KEY (`genome_group_member_id`), - UNIQUE KEY `unique_genome_group` (`genome_id`,`genome_group_id`), - KEY `idx_genome_id` (`genome_id`), - KEY `idx_genome_group_id` (`genome_group_id`), - KEY `idx_release_id` (`release_id`), - KEY `idx_is_current` (`is_current`), - CONSTRAINT `fk_ggm_genome` FOREIGN KEY (`genome_id`) REFERENCES `genome` (`genome_id`) ON DELETE CASCADE, - CONSTRAINT `fk_ggm_group` FOREIGN KEY (`genome_group_id`) REFERENCES `genome_group` (`genome_group_id`) ON DELETE CASCADE, - CONSTRAINT `fk_ggm_release` FOREIGN KEY (`release_id`) REFERENCES `ensembl_release` (`release_id`) ON DELETE SET NULL -) ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=COMPACT; - -CREATE TABLE `genome_release` -( - `genome_release_id` int(11) NOT NULL AUTO_INCREMENT, - `is_current` tinyint(1) NOT NULL, - `genome_id` int(11) NOT NULL, - `release_id` int(11) NOT NULL, - PRIMARY KEY (`genome_release_id`), - UNIQUE KEY `uk_genome_dataset` (`release_id`,`genome_id`), - KEY `genome_release_genome_id_3e45dc04_fk_genome_genome_id` (`genome_id`), - CONSTRAINT `genome_release_genome_id_3e45dc04_fk_genome_genome_id` FOREIGN KEY (`genome_id`) REFERENCES `genome` (`genome_id`), - CONSTRAINT `genome_release_release_id_bca7e1e5_fk_ensembl_release_release_id` FOREIGN KEY (`release_id`) REFERENCES `ensembl_release` (`release_id`) -) ENGINE=InnoDB AUTO_INCREMENT=31 DEFAULT CHARSET=latin1; - -CREATE TABLE `ncbi_taxa_name` ( - `taxon_id` int(10) unsigned NOT NULL, - `name` varchar(500) NOT NULL, - `name_class` varchar(50) NOT NULL, - KEY `taxon_id` (`taxon_id`), - KEY `name` (`name`), - KEY `name_class` (`name_class`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1; - -CREATE TABLE `ncbi_taxa_node` ( - `taxon_id` int(10) unsigned NOT NULL, - `parent_id` int(10) unsigned NOT NULL, - `rank` char(32) NOT NULL DEFAULT '', - `genbank_hidden_flag` tinyint(1) NOT NULL DEFAULT '0', - `left_index` int(10) NOT NULL DEFAULT '0', - `right_index` int(10) NOT NULL DEFAULT '0', - `root_id` int(10) NOT NULL DEFAULT '1', - PRIMARY KEY (`taxon_id`), - KEY `parent_id` (`parent_id`), - KEY `rank` (`rank`), - KEY `left_index` (`left_index`), - KEY `right_index` (`right_index`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1; - -CREATE TABLE `organism` -( - `organism_id` int(11) NOT NULL AUTO_INCREMENT, - `taxonomy_id` int(11) NOT NULL, - `species_taxonomy_id` int(11) DEFAULT NULL, - `common_name` varchar(128) NOT NULL, - `strain` varchar(128) DEFAULT NULL, - `scientific_name` varchar(128) DEFAULT NULL, - `biosample_id` varchar(128) NOT NULL, - `scientific_parlance_name` varchar(255) DEFAULT NULL, - `organism_uuid` char(36) NOT NULL, - `strain_type` varchar(128) DEFAULT NULL, - `rank` int(11) DEFAULT '0', - PRIMARY KEY (`organism_id`), - UNIQUE KEY `ensembl_name` (`biosample_id`), - UNIQUE KEY `organism_uuid` (`organism_uuid`) -) ENGINE=InnoDB AUTO_INCREMENT=176 DEFAULT CHARSET=latin1; - -CREATE TABLE `organism_group` -( - `organism_group_id` int(11) NOT NULL AUTO_INCREMENT, - `type` varchar(32) DEFAULT NULL, - `name` varchar(255) NOT NULL, - `code` varchar(48) DEFAULT NULL, - PRIMARY KEY (`organism_group_id`), - UNIQUE KEY `code` (`code`), - UNIQUE KEY `organism_group_type_name_170b6dae_uniq` (`type`,`name`) -) ENGINE=InnoDB AUTO_INCREMENT=15 DEFAULT CHARSET=latin1; - -CREATE TABLE `organism_group_member` -( - `organism_group_member_id` int(11) NOT NULL AUTO_INCREMENT, - `is_reference` tinyint(1) NOT NULL DEFAULT '0', - `organism_id` int(11) NOT NULL, - `organism_group_id` int(11) NOT NULL, - `order` int(11) DEFAULT NULL, - PRIMARY KEY (`organism_group_member_id`), - UNIQUE KEY `organism_group_member_organism_id_organism_gro_fe8f49ac_uniq` (`organism_id`,`organism_group_id`), - KEY `organism_group_membe_organism_group_id_533ca128_fk_organism_` (`organism_group_id`), - CONSTRAINT `organism_group_membe_organism_group_id_533ca128_fk_organism_` FOREIGN KEY (`organism_group_id`) REFERENCES `organism_group` (`organism_group_id`) ON DELETE CASCADE, - CONSTRAINT `organism_group_membe_organism_id_2808252e_fk_organism_` FOREIGN KEY (`organism_id`) REFERENCES `organism` (`organism_id`) ON DELETE CASCADE -) ENGINE=InnoDB AUTO_INCREMENT=235 DEFAULT CHARSET=latin1; - -CREATE TABLE `sequence_alias` -( - `sequence_alias_id` int(11) NOT NULL AUTO_INCREMENT, - `assembly_sequence_id` int(11) NOT NULL, - `alias` varchar(128) NOT NULL, - `source` varchar(128) DEFAULT NULL, - PRIMARY KEY (`sequence_alias_id`), - UNIQUE KEY `unique_sequence_alias` (`assembly_sequence_id`,`alias`), - KEY `idx_alias` (`alias`), - KEY `idx_assembly_sequence_id` (`assembly_sequence_id`), - CONSTRAINT `fk_sa_assembly_sequence` FOREIGN KEY (`assembly_sequence_id`) REFERENCES `assembly_sequence` (`assembly_sequence_id`) ON DELETE CASCADE -) ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=COMPACT; - diff --git a/src/tests/databases/load_mysql_db.py b/src/tests/databases/load_mysql_db.py deleted file mode 100644 index 7045c8d8..00000000 --- a/src/tests/databases/load_mysql_db.py +++ /dev/null @@ -1,227 +0,0 @@ -#!/usr/bin/env python3 -""" -Load a test database into MySQL from directory structure. -Reads table.sql schema file and tab-separated .txt data files. -""" - -import argparse -import csv -from pathlib import Path -from urllib.parse import urlparse - -import mysql.connector -from mysql.connector import Error - - -def parse_mysql_uri(uri): - """Parse MySQL URI and return connection parameters.""" - parsed = urlparse(uri) - - return { - "host": parsed.hostname, - "port": parsed.port or 3306, - "user": parsed.username, - "password": parsed.password, - "database": None, # We'll create databases ourselves - } - - -def create_database(cursor, db_name, drop_existing=False): - """Create database, optionally dropping it first if it exists.""" - try: - if drop_existing: - print(f"Dropping existing database '{db_name}' if it exists...") - cursor.execute(f"DROP DATABASE IF EXISTS `{db_name}`") - print(f"✓ Database dropped") - - cursor.execute(f"CREATE DATABASE IF NOT EXISTS `{db_name}`") - print(f"✓ Database '{db_name}' ready") - return True - except Error as e: - print(f"✗ Error creating database: {e}") - return False - - -def load_schema(cursor, schema_file): - """Load SQL schema from file.""" - with open(schema_file, "r", encoding="utf-8") as f: - schema_sql = f.read() - - # Split into individual statements (handle multi-statement SQL) - statements = [s.strip() for s in schema_sql.split(";") if s.strip()] - - for statement in statements: - try: - cursor.execute(statement) - except Error as e: - print(f"✗ Error executing statement: {e}") - print(f" Statement: {statement[:100]}...") - raise - - print(f"✓ Schema loaded") - - -def get_table_columns(cursor, table_name): - """Get column names for a table.""" - cursor.execute(f"SHOW COLUMNS FROM `{table_name}`") - return [row[0] for row in cursor.fetchall()] - - -def load_table_data(cursor, table_name, txt_file): - """Load data from tab-separated file into table.""" - - # Get column information - columns = get_table_columns(cursor, table_name) - column_count = len(columns) - - # Prepare INSERT statement - placeholders = ",".join(["%s"] * column_count) - insert_sql = f"INSERT INTO `{table_name}` VALUES ({placeholders})" - - rows_inserted = 0 - with open(txt_file, "r", encoding="utf-8") as f: - reader = csv.reader(f, delimiter="\t") - - for row in reader: - # Handle MySQL NULL representation and clean data - cleaned_row = [] - for val in row: - if val == "\\N": - cleaned_row.append(None) - else: - # Strip trailing commas and whitespace - cleaned_row.append(val.rstrip(",").strip() if val else val) - - try: - cursor.execute(insert_sql, cleaned_row) - rows_inserted += 1 - except Error as e: - print(f"⚠ Warning: Error inserting row into {table_name}: {e}") - print(f" Row data: {cleaned_row}") - - return rows_inserted - - -def load_database(db_dir, mysql_uri, db_name=None, drop_existing=False): - """Load a database directory into MySQL.""" - db_path = Path(db_dir) - - if not db_path.exists(): - print(f"✗ Error: Directory {db_dir} does not exist") - return False - - if not db_path.is_dir(): - print(f"✗ Error: {db_dir} is not a directory") - return False - - # Use provided database name or default to test_ - if not db_name: - db_name = f"test_{db_path.name}" - - print(f"\nLoading database from: {db_path}") - print(f"Target database name: {db_name}\n") - - # Check for schema file - schema_file = db_path / "table.sql" - if not schema_file.exists(): - print(f"✗ Error: No table.sql found in {db_path}") - return False - - # Parse MySQL connection - try: - connection_params = parse_mysql_uri(mysql_uri) - except Exception as e: - print(f"✗ Error parsing MySQL URI: {e}") - print("Expected format: mysql://user:password@host:port/") - return False - - try: - # Connect to MySQL server - conn = mysql.connector.connect(**connection_params) - cursor = conn.cursor() - - # Create and use database - if not create_database(cursor, db_name, drop_existing): - return False - - cursor.execute(f"USE `{db_name}`") - - # Disable foreign key checks during data load - cursor.execute("SET FOREIGN_KEY_CHECKS=0") - print(f"✓ Foreign key checks disabled") - - # Load schema - load_schema(cursor, schema_file) - conn.commit() - - # Load data from all .txt files - txt_files = sorted(db_path.glob("*.txt")) - - if not txt_files: - print(f"⚠ No data files found") - - for txt_file in txt_files: - table_name = txt_file.stem - - # Check if table exists - cursor.execute("SHOW TABLES LIKE %s", (table_name,)) - if not cursor.fetchone(): - print(f"⚠ Table '{table_name}' not found in schema, skipping {txt_file.name}") - continue - - rows = load_table_data(cursor, table_name, txt_file) - conn.commit() - print(f"✓ Loaded {rows} rows into {table_name}") - - # Re-enable foreign key checks - cursor.execute("SET FOREIGN_KEY_CHECKS=1") - print(f"\n✓ Foreign key checks re-enabled") - - cursor.close() - conn.close() - - print(f"\n{'=' * 60}") - print(f"✓ Successfully loaded database: {db_name}") - return True - - except Error as e: - print(f"✗ MySQL Error: {e}") - return False - except Exception as e: - print(f"✗ Error: {e}") - return False - - -if __name__ == "__main__": - parser = argparse.ArgumentParser( - description="Load a test database into MySQL from directory structure", - formatter_class=argparse.RawDescriptionHelpFormatter, - epilog=""" -Examples: - %(prog)s core_1 mysql://root:password@localhost:3306/ - %(prog)s /path/to/core_1 mysql://user:pass@db.example.com:3306/ - %(prog)s core_1 mysql://root:password@localhost:3306/ --name my_test_db - %(prog)s core_1 mysql://root:password@localhost:3306/ --drop - -The script will create a database named 'test_' by default, -or use the name specified with --name. Use --drop to drop and recreate the -database if it already exists. - """, - ) - parser.add_argument("directory", help="Directory containing table.sql and .txt data files") - parser.add_argument("mysql_uri", help="MySQL connection URI (mysql://user:password@host:port/)") - parser.add_argument("-n", "--name", help="Database name (default: test_)", default=None) - parser.add_argument("-d", "--drop", action="store_true", help="Drop database if it exists before loading") - - args = parser.parse_args() - - # Check if mysql-connector-python is installed - try: - import mysql.connector - except ImportError: - print("✗ Error: mysql-connector-python is not installed") - print("Install it with: pip install mysql-connector-python") - exit(1) - - success = load_database(args.directory, args.mysql_uri, args.name, args.drop) - exit(0 if success else 1) diff --git a/src/tests/databases/mysql2sqlite.py b/src/tests/databases/mysql2sqlite.py deleted file mode 100644 index 2e96e499..00000000 --- a/src/tests/databases/mysql2sqlite.py +++ /dev/null @@ -1,244 +0,0 @@ -#!/usr/bin/env python3 -""" -Convert MySQL database to SQLite using SQLAlchemy. -Uses reflection to automatically handle schema and data copying. -""" - -import argparse -from pathlib import Path - -from sqlalchemy import create_engine, MetaData, inspect, Integer, String, Text, Float, Boolean, text -from sqlalchemy.orm import sessionmaker -from sqlalchemy.pool import NullPool - - -def convert_mysql_types_to_sqlite(metadata): - """ - Convert MySQL-specific types to SQLite-compatible types. - Modifies the metadata in place. - """ - type_mapping = { - "TINYINT": Integer, - "SMALLINT": Integer, - "MEDIUMINT": Integer, - "INT": Integer, - "BIGINT": Integer, - "DECIMAL": Float, - "FLOAT": Float, - "DOUBLE": Float, - "VARCHAR": String, - "CHAR": String, - "TEXT": Text, - "MEDIUMTEXT": Text, - "LONGTEXT": Text, - "ENUM": String, - "SET": String, - } - - for table_name, table in metadata.tables.items(): - for column in table.columns: - type_name = type(column.type).__name__.upper() - - # Handle TINYINT(1) as Boolean - if type_name == "TINYINT": - # Check if it's TINYINT(1) which is typically used for boolean - if hasattr(column.type, "display_width") and column.type.display_width == 1: - column.type = Boolean() - else: - column.type = Integer() - elif type_name in type_mapping: - # Get length/precision if available - if hasattr(column.type, "length") and column.type.length: - column.type = type_mapping[type_name](length=column.type.length) - else: - column.type = type_mapping[type_name]() - - return metadata - - -def remove_indexes(metadata): - """ - Remove all indexes from metadata (except primary key constraints). - Useful for test databases where indexes aren't needed. - """ - for table_name, table in metadata.tables.items(): - # Create a list of indexes to remove (can't modify during iteration) - indexes_to_remove = [idx for idx in table.indexes] - - # Remove each index - for idx in indexes_to_remove: - table.indexes.remove(idx) - - return metadata - - -def convert_database(mysql_url, sqlite_path, batch_size=1000, keep_indexes=False): - """ - Convert a MySQL database to SQLite using SQLAlchemy reflection. - - Args: - mysql_url: MySQL connection URL (e.g., mysql://user:pass@host:port/dbname) - sqlite_path: Path to output SQLite database file - batch_size: Number of rows to copy per batch - keep_indexes: Whether to keep indexes (default: False, since not needed for tests) - """ - print(f"\nConverting database to: {sqlite_path}") - - # Create engines - mysql_engine = create_engine(mysql_url, poolclass=NullPool) - - # Remove existing SQLite file if it exists - sqlite_file = Path(sqlite_path) - if sqlite_file.exists(): - sqlite_file.unlink() - print(f"✓ Removed existing SQLite file") - - sqlite_engine = create_engine(f"sqlite:///{sqlite_path}") - - # Reflect MySQL schema - print("Reflecting MySQL schema...") - mysql_metadata = MetaData() - mysql_metadata.reflect(bind=mysql_engine) - - print(f"✓ Found {len(mysql_metadata.tables)} tables") - - # Convert MySQL types to SQLite-compatible types - print("Converting MySQL types to SQLite types...") - convert_mysql_types_to_sqlite(mysql_metadata) - print("✓ Types converted") - - # Remove indexes unless user wants to keep them - if not keep_indexes: - print("Removing indexes (not needed for unit tests)...") - remove_indexes(mysql_metadata) - print("✓ Indexes removed") - - # Create SQLite schema - print("Creating SQLite schema...") - mysql_metadata.create_all(sqlite_engine) - print("✓ Schema created") - - # Get inspector to check for foreign keys - inspector = inspect(mysql_engine) - - # Disable foreign key checks in SQLite during data load - with sqlite_engine.begin() as conn: - conn.execute(text("PRAGMA foreign_keys = OFF")) - - # Copy data table by table - print("\nCopying data...") - - # Create sessions - MySQLSession = sessionmaker(bind=mysql_engine) - SQLiteSession = sessionmaker(bind=sqlite_engine) - - mysql_session = MySQLSession() - sqlite_session = SQLiteSession() - - try: - for table_name in mysql_metadata.tables: - table = mysql_metadata.tables[table_name] - print(f" Copying {table_name}...", end=" ", flush=True) - - # Count rows in MySQL - count = mysql_session.execute(table.select()).rowcount - if count == -1: # Some drivers don't support rowcount on select - # Get actual count - result = mysql_session.execute(table.select()) - rows = result.fetchall() - count = len(rows) - - # Insert in batches - total_inserted = 0 - for i in range(0, count, batch_size): - batch = rows[i: i + batch_size] - if batch: - sqlite_session.execute(table.insert(), [dict(row._mapping) for row in batch]) - total_inserted += len(batch) - - sqlite_session.commit() - print(f"✓ {total_inserted} rows") - else: - # Stream and batch insert - result = mysql_session.execute(table.select()) - total_inserted = 0 - - while True: - batch = result.fetchmany(batch_size) - if not batch: - break - - sqlite_session.execute(table.insert(), [dict(row._mapping) for row in batch]) - total_inserted += len(batch) - - sqlite_session.commit() - print(f"✓ {total_inserted} rows") - - # Re-enable foreign keys - with sqlite_engine.begin() as conn: - conn.execute(text("PRAGMA foreign_keys = ON")) - - print(f"\n{'=' * 60}") - print(f"✓ Successfully converted to {sqlite_path}") - return True - - except Exception as e: - print(f"\n✗ Error during conversion: {e}") - import traceback - - traceback.print_exc() - sqlite_session.rollback() - return False - finally: - mysql_session.close() - sqlite_session.close() - mysql_engine.dispose() - sqlite_engine.dispose() - - -def main(): - parser = argparse.ArgumentParser( - description="Convert MySQL database to SQLite using SQLAlchemy", - formatter_class=argparse.RawDescriptionHelpFormatter, - epilog=""" -Examples: - %(prog)s mysql://user:pass@host:port/test_core_1 core_1.db - %(prog)s mysql://user:pass@host:port/test_compara_db compara_db.db - %(prog)s mysql://user:pass@host/test_core_1 ./sqlite_dbs/core_1.db --batch-size 5000 - -The script uses SQLAlchemy to reflect the MySQL schema and copy all data to SQLite. -This preserves table structures, indexes, and relationships automatically. - """, - ) - parser.add_argument("mysql_url", help="MySQL connection URL (mysql://user:password@host:port/database)") - parser.add_argument("sqlite_path", help="Output SQLite database file path") - parser.add_argument( - "-b", "--batch-size", type=int, default=1000, help="Number of rows to copy per batch (default: 1000)" - ) - parser.add_argument( - "-k", - "--keep-indexes", - action="store_true", - help="Keep indexes in SQLite (default: False, indexes removed for faster tests)", - ) - - args = parser.parse_args() - - # Check if SQLAlchemy is installed - try: - import sqlalchemy - except ImportError: - print("✗ Error: SQLAlchemy is not installed") - print("Install it with: pip install sqlalchemy") - exit(1) - - # Create output directory if needed - output_path = Path(args.sqlite_path) - output_path.parent.mkdir(parents=True, exist_ok=True) - - success = convert_database(args.mysql_url, args.sqlite_path, args.batch_size, args.keep_indexes) - exit(0 if success else 1) - - -if __name__ == "__main__": - main() diff --git a/src/tests/databases/ncbi_taxonomy/ncbi_taxa_name.txt b/src/tests/databases/ncbi_taxonomy/ncbi_taxa_name.txt deleted file mode 100644 index a8f5dd28..00000000 --- a/src/tests/databases/ncbi_taxonomy/ncbi_taxa_name.txt +++ /dev/null @@ -1,472 +0,0 @@ -562 Achromobacter sp. ATCC 35328 includes -562 ATCC 11775 type material -562 Bacillus coli Migula 1895 authority -562 Bacillus coli synonym -562 bacterium 10a includes -562 Bacterium coli commune Escherich 1885 authority -562 Bacterium coli commune synonym -562 Bacterium coli (Migula 1895) Lehmann and Neumann 1896 authority -562 Bacterium coli synonym -562 bacterium E3 includes -562 CCUG 24 type material -562 CCUG 29300 type material -562 CIP 54.8 type material -562 DSM 30083 type material -562 E. coli common name -562 Enterococcus coli synonym -562 Escherichia coli (Migula 1895) Castellani and Chalmers 1919 authority -562 Escherichia coli scientific name -562 Escherichia/Shigella coli equivalent name -562 Escherichia sp. 3_2_53FAA includes -562 Escherichia sp. MAR includes -562 IAM 12119 type material -562 JCM 1649 type material -562 LMG 2092 type material -562 LMG:2092 type material -562 NBRC 102203 type material -562 NCCB 54008 type material -562 NCTC 9001 type material -562 strain U5/41 type material -3702 Arabidopsis thaliana (L.) Heynh., 1842 authority -3702 Arabidopsis thaliana scientific name -3702 Arabis thaliana L., 1753 authority -3702 Arabis thaliana synonym -3702 mouse-ear cress common name -3702 thale-cress common name -3702 thale cress genbank common name -3708 Brassica napus L., 1753 authority -3708 Brassica napus scientific name -3708 oilseed rape common name -3708 rape genbank common name -3708 rapeseeds common name -3711 Brassica rapa L., 1753 authority -3711 Brassica rapa scientific name -3711 field mustard genbank common name -3712 Brassica oleracea L., 1753 authority -3712 Brassica oleracea scientific name -3712 wild cabbage genbank common name -3847 Glycine max (L.) Merr., 1917 authority -3847 Glycine max scientific name -3847 Phaseolus max L., 1753 authority -3847 Phaseolus max synonym -3847 soybean genbank common name -3847 soybeans common name -3880 barrel medic genbank common name -3880 Medicago truncatula Gaertn., 1790 authority -3880 Medicago truncatula scientific name -4081 Lycopersicon esculentum Mill. authority -4081 Lycopersicon esculentum synonym -4081 Lycopersicon esculentum var. esculentum synonym -4081 Solanum esculentum Dunal authority -4081 Solanum esculentum synonym -4081 Solanum lycopersicum L., 1753 authority -4081 Solanum lycopersicum scientific name -4081 Solanum lycopersicum var. humboldtii synonym -4081 tomato genbank common name -4113 potatoes common name -4113 potato genbank common name -4113 Solanum tuberosum L., 1753 authority -4113 Solanum tuberosum scientific name -4113 Solanum tuberosum subsp. tuberosum includes -4513 barley common name -4513 Hordeum vulgare L., 1753 authority -4513 Hordeum vulgare scientific name -4530 Asian cultivated rice genbank common name -4530 Oryza sativa L., 1753 authority -4530 Oryza sativa scientific name -4530 red rice common name -4530 rice common name -4558 Andropogon sorghum (L.) Brot. authority -4558 Andropogon sorghum synonym -4558 broomcorn common name -4558 milo common name -4558 Sorghum bicolor (L.) Moench, 1794 authority -4558 Sorghum bicolor scientific name -4558 Sorghum bicolor subsp. bicolor synonym -4558 sorghum genbank common name -4558 Sorghum nervosum Besser ex Schult. authority -4558 Sorghum nervosum synonym -4558 Sorghum saccharatum (L.) Moench authority -4558 Sorghum saccharatum synonym -4558 Sorghum vulgare Pers. authority -4558 Sorghum vulgare synonym -4565 bread wheat genbank common name -4565 Canadian hard winter wheat common name -4565 common wheat common name -4565 Triticum aestivum L., 1753 authority -4565 Triticum aestivum scientific name -4565 Triticum aestivum subsp. aestivum synonym -4565 Triticum vulgare synonym -4565 Triticum vulgare Vill., 1787 authority -4565 wheat common name -4567 durum wheat genbank common name -4567 Triticum durum Desf. authority -4567 Triticum durum ssp. durum synonym -4567 Triticum durum subsp. durum synonym -4567 Triticum durum synonym -4567 Triticum rigidum conv. durum synonym -4567 Triticum rigidum ssp. durum synonym -4567 Triticum rigidum var. durum synonym -4567 Triticum turgidum Durum Group synonym -4567 Triticum turgidum subsp. durum (Desf.) Husn., 1899 authority -4567 Triticum turgidum subsp. durum scientific name -4571 cone wheat common name -4571 English wheat common name -4571 poulard wheat common name -4571 rivet wheat common name -4571 Triticum aethiopicum Jakubz., 1947 authority -4571 Triticum aethiopicum synonym -4571 Triticum durum subsp. abyssinicum synonym -4571 Triticum durum subsp. abyssinicum Vavilov, 1931 authority -4571 Triticum turgidum L., 1753 authority -4571 Triticum turgidum scientific name -4577 maize common name -4577 Zea mays L., 1753 authority -4577 Zea mays scientific name -4577 Zea mays var. japonica synonym -4932 ATCC 18824 type material -4932 baker's yeast common name -4932 brewer's yeast genbank common name -4932 Candida robusta synonym -4932 CBS 1171 type material -4932 Mycoderma cerevisiae Desm., 1827 authority -4932 Mycoderma cerevisiae synonym -4932 NRRL Y-12632 type material -4932 Saccharomyces capensis synonym -4932 Saccharomyces cerevisiae (Desm.) Meyen, 1838 authority -4932 Saccharomyces cerevisiae scientific name -4932 Saccharomyces cerevisiae 'var. diastaticus' equivalent name -4932 Saccharomyces diastaticus J. Andrews & R.B. Gilliland ex Van der Walt, 1965 authority -4932 Saccharomyces diastaticus synonym -4932 Saccharomyces italicus synonym -4932 Saccharomyces oviformis synonym -4932 Saccharomyces uvarum var. melibiosus synonym -4932 specimen-voucher:NRRL:Y:12632 type material -5833 malaria parasite P. falciparum genbank common name -5833 Plasmodium falciparum scientific name -5833 Plasmodium (Laverania) falciparum synonym -6239 Caenorhabditis elegans (Maupas, 1900) authority -6239 Caenorhabditis elegans scientific name -6239 Rhabditis elegans Maupas, 1900 authority -6239 Rhabditis elegans synonym -7227 Diptera sp. DNAS-2A9-224646 includes -7227 Drosophila melanogaster Meigen, 1830 authority -7227 Drosophila melanogaster scientific name -7227 fruit fly genbank common name -7227 Sophophora melanogaster (Meigen, 1830) authority -7227 Sophophora melanogaster synonym -7955 Brachydanio rerio frankei synonym -7955 Brachydanio rerio synonym -7955 Cyprinus rerio Hamilton, 1822 authority -7955 Cyprinus rerio synonym -7955 Danio frankei synonym -7955 Danio rerio frankei synonym -7955 Danio rerio (Hamilton, 1822) authority -7955 Danio rerio scientific name -7955 leopard danio common name -7955 zebra danio common name -7955 zebrafish genbank common name -7955 zebra fish common name -7994 Astyanax mexicanus (De Filippi, 1853) authority -7994 Astyanax mexicanus scientific name -7994 blind cave fish common name -7994 Mexican tetra genbank common name -7994 Tetragonopterus mexicanus De Filippi, 1853 authority -7994 Tetragonopterus mexicanus synonym -8030 Atlantic salmon genbank common name -8030 Salmo salar Linnaeus, 1758 authority -8030 Salmo salar scientific name -8090 Japanese medaka genbank common name -8090 Japanese rice fish common name -8090 medaka common name -8090 Oryzias latipes scientific name -8090 Oryzias latipes (Temminck & Schlegel, 1846) authority -8090 Poecilia latipes synonym -8090 Poecilia latipes Temminck & Schlegel, 1846 authority -8128 Nile tilapia genbank common name -8128 Oreochromis nilotica synonym -8128 Oreochromis niloticus (Linnaeus, 1758) authority -8128 Oreochromis niloticus scientific name -8128 Perca nilotica Linnaeus, 1758 authority -8128 Perca nilotica synonym -8128 Tilapia nilotica synonym -8364 Silurana tropicalis Gray, 1864 authority -8364 Silurana tropicalis synonym -8364 tropical clawed frog genbank common name -8364 western clawed frog common name -8364 Xenopus laevis tropicalis synonym -8364 Xenopus (Silurana) tropicalis synonym -8364 Xenopus tropicalis (Gray, 1864) authority -8364 Xenopus tropicalis scientific name -9031 bantam common name -9031 chicken genbank common name -9031 chickens common name -9031 dwarf Leghorn chickens includes -9031 Gallus domesticus equivalent name -9031 Gallus gallus domesticus synonym -9031 Gallus gallus scientific name -9031 Phasianus gallus Linnaeus, 1758 authority -9031 Phasianus gallus synonym -9031 red junglefowl includes -9413 greater false vampire bat common name -9413 Indian false vampire genbank common name -9413 Megaderma lyra Saint-Hilaire, 1810 authority -9413 Megaderma lyra scientific name -9544 Cercopithecus mulatta synonym -9544 Cercopithecus mulatta Zimmermann, 1780 authority -9544 Macaca mulatta scientific name -9544 Macaca mulatta (Zimmermann, 1780) authority -9544 rhesus macaque common name -9544 rhesus macaques common name -9544 Rhesus monkey genbank common name -9544 rhesus monkeys common name -9597 bonobo common name -9597 Pan paniscus Schwarz, 1929 authority -9597 Pan paniscus scientific name -9597 pygmy chimpanzee genbank common name -9598 chimpanzee genbank common name -9598 Pan troglodytes scientific name -9598 Simia troglodytes Linnaeus, 1758 authority -9598 Simia troglodytes synonym -9606 Homo sapiens Linnaeus, 1758 authority -9606 Homo sapiens scientific name -9606 human genbank common name -9612 Canis lupus Linnaeus, 1758 authority -9612 Canis lupus scientific name -9612 gray wolf genbank common name -9612 grey wolf common name -9615 beagle dog includes -9615 beagle dogs includes -9615 Canis canis synonym -9615 Canis domesticus synonym -9615 Canis familiaris Linnaeus, 1758 authority -9615 Canis familiaris synonym -9615 Canis lupus familiaris Linnaeus, 1758 authority -9615 Canis lupus familiaris scientific name -9615 dog genbank common name -9615 dogs common name -9685 cat common name -9685 cats common name -9685 domestic cat genbank common name -9685 Felis catus Linnaeus, 1758 authority -9685 Felis catus scientific name -9685 Felis domesticus synonym -9685 Felis silvestris catus synonym -9685 Korat cats includes -9685 Korat cats L. authority -9796 domestic horse common name -9796 equine common name -9796 Equus caballus Linnaeus, 1758 authority -9796 Equus caballus scientific name -9796 Equus przewalskii f. caballus synonym -9796 Equus przewalskii forma caballus synonym -9796 horse genbank common name -9823 pig genbank common name -9823 pigs common name -9823 Sus scrofa Linnaeus, 1758 authority -9823 Sus scrofa scientific name -9823 swine common name -9823 wild boar common name -9913 Bos bovis synonym -9913 Bos primigenius taurus synonym -9913 Bos taurus Linnaeus, 1758 authority -9913 Bos taurus scientific name -9913 Bovidae sp. Adi Nefas includes -9913 bovine common name -9913 cattle genbank common name -9913 cow common name -9913 dairy cow common name -9913 domestic cattle common name -9913 domestic cow common name -9913 ox common name -9925 African dwarf goat includes -9925 African dwarf goats includes -9925 Capra aegagrus hircus synonym -9925 Capra hircus Linnaeus, 1758 authority -9925 Capra hircus scientific name -9925 domestic goat common name -9925 goat genbank common name -9925 goats common name -9925 Naine d'Afrique de l'Ouest includes -9940 domestic sheep common name -9940 lambs common name -9940 Ovis ammon aries synonym -9940 Ovis aries Linnaeus, 1758 authority -9940 Ovis aries scientific name -9940 Ovis orientalis aries synonym -9940 Ovis ovis synonym -9940 sheep genbank common name -9940 wild sheep common name -9986 domestic rabbit common name -9986 European rabbit common name -9986 Japanese white rabbit common name -9986 Lepus cuniculus Linnaeus, 1758 authority -9986 Lepus cuniculus synonym -9986 New Zealand rabbit includes -9986 Oryctolagus cuniculus scientific name -9986 rabbit genbank common name -9986 rabbits common name -10029 Chinese hamster genbank common name -10029 Chinese hamsters common name -10029 CHO cell lines includes -10029 Cricetulus aureus equivalent name -10029 Cricetulus barabensis griseus synonym -10029 Cricetulus griseus Milne-Edwards, 1867 authority -10029 Cricetulus griseus scientific name -10089 Mus caroli Bonhote, 1902 authority -10089 Mus caroli scientific name -10089 Mus formosanus Kuroda, 1925 authority -10089 Mus formosanus synonym -10089 ricefield mouse common name -10089 Ryukyu mouse genbank common name -10090 house mouse genbank common name -10090 LK3 transgenic mice includes -10090 mouse common name -10090 Mus musculus Linnaeus, 1758 authority -10090 Mus musculus scientific name -10090 Mus sp. 129SV includes -10090 nude mice includes -10090 transgenic mice includes -10091 Mus castaneus synonym -10091 Mus musculus castaneus scientific name -10091 Mus musculus castaneus Waterhouse, 1843 authority -10091 southeastern Asian house mouse genbank common name -10092 Mus domesticus synonym -10092 Mus musculus domesticus Schwarz & Scharz 1943 authority -10092 Mus musculus domesticus scientific name -10092 Mus musculus praetextus synonym -10092 Mus praetextus synonym -10092 western European house mouse genbank common name -10093 Coelomys parahi synonym -10093 Gairdner's shrew-mouse common name -10093 Gairdner's shrewmouse common name -10093 Mus pahari scientific name -10093 Mus pahari Thomas, 1916 authority -10093 shrew mouse genbank common name -10096 Algerian mouse common name -10096 Mus musculus spretus synonym -10096 Mus spretus Lataste, 1883 authority -10096 Mus spretus scientific name -10096 western wild mouse genbank common name -10116 brown rat common name -10116 Buffalo rat includes -10116 laboratory rat includes -10116 Mus norvegicus Berkenhout, 1769 authority -10116 Mus norvegicus synonym -10116 Norway rat genbank common name -10116 rat common name -10116 rats common name -10116 Rattus norvegicus scientific name -10116 Rattus PC12 clone IS includes -10116 Rattus sp. strain Wistar includes -10116 Sprague-Dawley rat includes -10116 Wistar rats includes -10116 zitter rats includes -13616 Didelphys domestica synonym -13616 Didelphys domestica Wagner, 1842 authority -13616 gray short-tailed opossum genbank common name -13616 Monodelphis domestica scientific name -29760 Vitis vinifera L., 1753 authority -29760 Vitis vinifera scientific name -29760 Vitis vinifera subsp. vinifera synonym -29760 wine grape genbank common name -36329 Plasmodium falciparum 3D7 scientific name -36329 Plasmodium falciparum (isolate 3D7) synonym -37682 Aegilops squarrosa subsp. squarrosa synonym -37682 Aegilops squarrosa synonym -37682 Aegilops tauschii Coss., 1849 authority -37682 Aegilops tauschii scientific name -37682 Patropyrum tauschii (Coss.) A.Love authority -37682 Patropyrum tauschii subsp. tauschii synonym -37682 Patropyrum tauschii synonym -37682 Triticum aegilops P.Beauv. ex Roem. & Schult. authority -37682 Triticum aegilops synonym -37682 Triticum tauschii (Coss.) Schmalh. authority -37682 Triticum tauschii synonym -39442 eastern European house mouse genbank common name -39442 Mus musculus hortulanus synonym -39442 Mus musculus musculus scientific name -39946 Indian rice common name -39946 Indica rice common name -39946 long-grained rice genbank common name -39946 Oryza sativa (indica cultivar-group) synonym -39946 Oryza sativa Indica Group scientific name -39946 Oryza sativa (indica group) synonym -39946 Oryza sativa subsp. indica Kato authority -39946 Oryza sativa subsp. indica synonym -39946 Oryza sp. Poi-6 includes -39947 Japanese rice genbank common name -39947 Japonica rice common name -39947 Oryza sativa (japonica cultivar-group) synonym -39947 Oryza sativa Japonica Group scientific name -39947 Oryza sativa subsp. japonica synonym -109376 Brassica oleracea subsp. oleracea synonym -109376 Brassica oleracea var. oleracea scientific name -112509 domesticated barley genbank common name -112509 Hordeum sativum Jess. authority -112509 Hordeum sativum synonym -112509 Hordeum vulgare subsp. vulgare scientific name -112509 Hordeum vulgare subsp. vulgare Spenn. authority -112509 Hordeum vulgare var. nudum Spenn. authority -112509 Hordeum vulgare var. nudum synonym -112509 Hordeum vulgare var. vulgare synonym -112509 two-rowed barley common name -200361 Aegilops tauschii subsp. strangulata (Eig) Tzvelev, 1973 authority -200361 Aegilops tauschii subsp. strangulata scientific name -511145 Escherichia coli MG1655 synonym -511145 Escherichia coli strain MG1655 equivalent name -511145 Escherichia coli str. K12 substr. MG1655 equivalent name -511145 Escherichia coli str. K-12 substr. MG1655 scientific name -511145 Escherichia coli str. MG1655 equivalent name -559292 Saccharomyces cerevisiae S288C scientific name -1736656 Oryza sativa (javanica cultivar-group) synonym -1736656 Oryza sativa tropical japonica cultivar-group synonym -1736656 Oryza sativa tropical japonica group synonym -1736656 Oryza sativa tropical japonica subgroup scientific name -1736656 Oryza sativa var. javanica Koern. authority -1736656 Oryza sativa var. javanica synonym -1736658 Oryza sativa Aromatic Japonica Group synonym -1736658 Oryza sativa aromatic subgroup scientific name -1736658 Oryza sativa Group V synonym -1736659 Oryza sativa aus cultivar-group synonym -1736659 Oryza sativa aus group synonym -1736659 Oryza sativa aus subgroup scientific name -1736659 Oryza sativa aus synonym -3711 3706 merged_taxon_id -37682 4482 merged_taxon_id -112509 4514 merged_taxon_id -4577 4578 merged_taxon_id -9940 9936 merged_taxon_id -9986 9985 merged_taxon_id -7955 27702 merged_taxon_id -9986 34833 merged_taxon_id -10116 36465 merged_taxon_id -9685 36475 merged_taxon_id -9544 36502 merged_taxon_id -3708 36503 merged_taxon_id -4513 36528 merged_taxon_id -7955 37966 merged_taxon_id -4565 39424 merged_taxon_id -37682 40669 merged_taxon_id -4932 41870 merged_taxon_id -9925 57076 merged_taxon_id -8128 61227 merged_taxon_id -37682 70688 merged_taxon_id -4571 77607 merged_taxon_id -10090 85055 merged_taxon_id -4113 90692 merged_taxon_id -10029 143285 merged_taxon_id -4081 195582 merged_taxon_id -10092 210727 merged_taxon_id -4565 235075 merged_taxon_id -9913 272461 merged_taxon_id -4530 389215 merged_taxon_id -562 469598 merged_taxon_id -562 662101 merged_taxon_id -562 662104 merged_taxon_id -562 1637691 merged_taxon_id -562 1806490 merged_taxon_id -7227 2267365 merged_taxon_id -666668 carol_jabberwocky scientific name -6666666 Jabberwocky scientific name -666668 carol_jabberwocky scientific name diff --git a/src/tests/databases/ncbi_taxonomy/ncbi_taxa_node.txt b/src/tests/databases/ncbi_taxonomy/ncbi_taxa_node.txt deleted file mode 100644 index 90b0b774..00000000 --- a/src/tests/databases/ncbi_taxonomy/ncbi_taxa_node.txt +++ /dev/null @@ -1,64 +0,0 @@ -562 561 species 1 576069 582930 1 -3702 3701 species 1 2083678 2083679 1 -3708 3705 species 1 2087484 2087491 1 -3711 3705 species 1 2087500 2087531 1 -3712 3705 species 1 2087532 2087565 1 -3847 1462606 species 1 2065183 2065184 1 -3880 3877 species 1 2056937 2056944 1 -4081 49274 species 1 1940956 1940959 1 -4113 4107 species 1 1940913 1940916 1 -4513 4512 species 1 1729674 1729689 1 -4530 4527 species 0 1724403 1724424 1 -4558 4557 species 1 1712790 1712793 1 -4565 4564 species 1 1730800 1730831 1 -4567 4571 subspecies 1 1730853 1730854 1 -4571 4564 species 1 1730852 1730867 1 -4577 4575 species 1 1712922 1712933 1 -4932 4930 species 1 2220338 2220971 1 -5833 418107 species 1 4998850 4999195 1 -6239 6237 species 1 2641603 2641604 1 -7227 32351 species 1 3734717 3734718 1 -7955 7954 species 1 4736348 4736349 1 -7994 7993 species 1 4744616 4744617 1 -8030 8028 species 1 4766921 4766922 1 -8090 8089 species 1 4802895 4802896 1 -8128 8139 species 1 4796928 4796935 1 -8364 8363 species 1 4865014 4865015 1 -9031 9030 species 1 4926358 4926369 1 -9413 9412 species 1 4936848 4936849 1 -9544 9539 species 1 4948106 4948113 1 -9597 9596 species 1 4948688 4948689 1 -9598 9596 species 1 4948690 4948701 1 -9606 9605 species 1 4948704 4948709 1 -9612 9611 species 1 4942982 4943023 1 -9615 9612 subspecies 1 4942983 4942984 1 -9685 9682 species 1 4942587 4942588 1 -9796 9789 species 1 4941821 4941822 1 -9823 9822 species 1 4945820 4945851 1 -9913 9903 species 1 4945434 4945435 1 -9925 9922 species 1 4945068 4945071 1 -9940 9935 species 1 4945118 4945123 1 -9986 9984 species 1 4949258 4949263 1 -10029 10028 species 1 4956857 4956858 1 -10089 862507 species 1 4953572 4953573 1 -10090 862507 species 1 4953574 4953607 1 -10091 10090 subspecies 1 4953575 4953576 1 -10092 10090 subspecies 1 4953577 4953578 1 -10093 862508 species 1 4953660 4953661 1 -10096 862507 species 1 4953608 4953609 1 -10116 10114 species 1 4953915 4953918 1 -13616 13615 species 1 4932488 4932489 1 -29760 3603 species 1 1989578 1989583 1 -36329 5833 isolate 1 4998881 4998882 1 -37682 4480 species 1 1730742 1730751 1 -39442 10090 subspecies 1 4953581 4953582 1 -39946 4530 no rank 1 1724404 1724409 1 -39947 4530 no rank 1 1724410 1724417 1 -109376 3712 varietas 1 2087543 2087544 1 -112509 4513 subspecies 1 1729677 1729686 1 -200361 37682 subspecies 1 1730749 1730750 1 -511145 83333 no rank 1 576077 576078 1 -559292 4932 strain 1 2220367 2220368 1 -1736656 39947 no rank 1 1724411 1724412 1 -1736658 39947 no rank 1 1724415 1724416 1 -1736659 39946 no rank 1 1724405 1724406 1 diff --git a/src/tests/databases/ncbi_taxonomy/table.sql b/src/tests/databases/ncbi_taxonomy/table.sql deleted file mode 100644 index 016cdd55..00000000 --- a/src/tests/databases/ncbi_taxonomy/table.sql +++ /dev/null @@ -1,24 +0,0 @@ -CREATE TABLE `ncbi_taxa_name` ( - `taxon_id` int(10) unsigned NOT NULL, - `name` varchar(500) NOT NULL, - `name_class` varchar(50) NOT NULL, - KEY `taxon_id` (`taxon_id`), - KEY `name` (`name`), - KEY `name_class` (`name_class`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1; - -CREATE TABLE `ncbi_taxa_node` ( - `taxon_id` int(10) unsigned NOT NULL, - `parent_id` int(10) unsigned NOT NULL, - `rank` char(32) NOT NULL DEFAULT '', - `genbank_hidden_flag` tinyint(1) NOT NULL DEFAULT '0', - `left_index` int(10) NOT NULL DEFAULT '0', - `right_index` int(10) NOT NULL DEFAULT '0', - `root_id` int(10) NOT NULL DEFAULT '1', - PRIMARY KEY (`taxon_id`), - KEY `parent_id` (`parent_id`), - KEY `rank` (`rank`), - KEY `left_index` (`left_index`), - KEY `right_index` (`right_index`) -) ENGINE=InnoDB DEFAULT CHARSET=latin1; - From a498e7a4085b0b41b0b6a95912a7033e84dc6407 Mon Sep 17 00:00:00 2001 From: danielp Date: Thu, 23 Oct 2025 15:57:42 +0100 Subject: [PATCH 07/14] Vastly improved testing Implemented assembly uuid control Genome group support Simplified lookups for data moved to genome table Alias loading Assembly accession loading genebuild.version removed from updater multiple meta of same type allowed Checks for single only meta keys Fix for meta key verification --- .../metadata/api/adaptors/genome.py | 136 +- .../metadata/api/factories/utils.py | 46 +- .../metadata/api/models/assembly.py | 2 +- .../production/metadata/api/models/genome.py | 2 +- .../production/metadata/updater/core.py | 654 +++++++--- .../metadata/updater/updater_utils.py | 46 +- src/tests/databases/core_1.db | Bin 28672 -> 28672 bytes src/tests/databases/core_2.db | Bin 28672 -> 28672 bytes src/tests/databases/core_3.db | Bin 28672 -> 28672 bytes src/tests/databases/core_4.db | Bin 28672 -> 28672 bytes src/tests/databases/core_5.db | Bin 28672 -> 28672 bytes src/tests/databases/core_6.db | Bin 28672 -> 28672 bytes src/tests/databases/core_7.db | Bin 28672 -> 28672 bytes src/tests/databases/core_8.db | Bin 28672 -> 28672 bytes src/tests/databases/core_9.db | 0 .../databases/ensembl_genome_metadata.db | Bin 262144 -> 262144 bytes src/tests/test_dataset_factory.py | 6 +- src/tests/test_exports.py | 1101 +++++++++++++++++ src/tests/test_organism_to_organismgroup.py | 90 -- src/tests/test_release_factory.py | 19 + src/tests/test_scripts.py | 330 +++++ src/tests/test_updater.py | 29 +- src/tests/tests_exports.py | 20 - 23 files changed, 2087 insertions(+), 394 deletions(-) delete mode 100644 src/tests/databases/core_9.db create mode 100644 src/tests/test_exports.py delete mode 100644 src/tests/test_organism_to_organismgroup.py create mode 100644 src/tests/test_scripts.py delete mode 100644 src/tests/tests_exports.py diff --git a/src/ensembl/production/metadata/api/adaptors/genome.py b/src/ensembl/production/metadata/api/adaptors/genome.py index cbee9da8..9f305967 100644 --- a/src/ensembl/production/metadata/api/adaptors/genome.py +++ b/src/ensembl/production/metadata/api/adaptors/genome.py @@ -17,7 +17,6 @@ from typing import List, Tuple, NamedTuple import sqlalchemy as db -from ensembl.ncbi_taxonomy.models import NCBITaxaName from ensembl.utils.database import DBConnection from sqlalchemy import select, func, desc, or_, distinct, case from sqlalchemy.exc import NoResultFound @@ -25,9 +24,7 @@ from ensembl.production.metadata.api.adaptors.base import BaseAdaptor, check_parameter, cfg from ensembl.production.metadata.api.exceptions import TypeNotFoundException -from ensembl.production.metadata.api.models import Genome, Organism, Assembly, OrganismGroup, OrganismGroupMember, \ - GenomeRelease, EnsemblRelease, EnsemblSite, AssemblySequence, GenomeDataset, Dataset, DatasetType, DatasetSource, \ - ReleaseStatus, DatasetStatus, utils, DatasetAttribute, Attribute +from ensembl.production.metadata.api.models import * logger = logging.getLogger(__name__) @@ -149,10 +146,13 @@ def fetch_genomes_by_assembly_name_genebuild(self, session.expire_on_commit = False return session.execute(genome_select).all() - def fetch_genomes(self, genome_id=None, genome_uuid=None, genome_tag=None, organism_uuid=None, assembly_uuid=None, - assembly_accession=None, assembly_name=None, use_default_assembly=False, biosample_id=None, - production_name=None, taxonomy_id=None, group=None, unreleased_only=False, site_name=None, - release_type=None, release_version=None, current_only=False): + def fetch_genomes(self, genome_id=None, genome_uuid=None, genome_tag=None, organism_uuid=None, + assembly_uuid=None, assembly_accession=None, assembly_name=None, + use_default_assembly=False, biosample_id=None, production_name=None, + taxonomy_id=None, group=None, genome_group_id=None, genome_group_name=None, + genome_group_type=None, + genome_group_reference_only=False, unreleased_only=False, site_name=None, release_type=None, + release_version=None, current_only=False): """ Fetches genome information based on the specified parameters. @@ -221,6 +221,32 @@ def fetch_genomes(self, genome_id=None, genome_uuid=None, genome_tag=None, organ .join(OrganismGroupMember.organism_group) \ .filter(OrganismGroup.name.in_(group) | OrganismGroup.code.in_(group)) + # genome group logic + if genome_group_id or genome_group_name or genome_group_type or genome_group_reference_only: + genome_select = genome_select.join( + GenomeGroupMember, Genome.genome_id == GenomeGroupMember.genome_id + ).join( + GenomeGroup, GenomeGroup.genome_group_id == GenomeGroupMember.genome_group_id + ) + + if genome_group_id: + genome_group_id = check_parameter(genome_group_id) + genome_select = genome_select.where(GenomeGroup.genome_group_id.in_(genome_group_id)) + + if genome_group_name: + genome_group_name = check_parameter(genome_group_name) + genome_select = genome_select.where(GenomeGroup.name.in_(genome_group_name)) + + if genome_group_type: + genome_group_type = check_parameter(genome_group_type) + genome_select = genome_select.where(GenomeGroup.type.in_(genome_group_type)) + + if genome_group_reference_only: + genome_select = genome_select.where(GenomeGroupMember.is_reference == 1) + + if current_only: + genome_select = genome_select.where(GenomeGroupMember.is_current == 1) + # Apply additional filters based on the provided parameters if genome_id is not None: genome_select = genome_select.filter(Genome.genome_id.in_(genome_id)) @@ -870,6 +896,100 @@ def fetch_assemblies_count(self, species_taxonomy_id: int, release_version: floa with self.metadata_db.session_scope() as session: return session.execute(query).scalar() + def fetch_genome_groups(self, genome_id=None, genome_uuid=None, group_type=None, + is_current=True, release_version=None): + """ + Fetch all genome groups that a genome belongs to. + + Note: This is the inverse of filtering by genome_group in fetch_genomes(). + """ + + query = select(GenomeGroup).join( + GenomeGroupMember, GenomeGroup.genome_group_id == GenomeGroupMember.genome_group_id + ).join( + Genome, Genome.genome_id == GenomeGroupMember.genome_id + ) + + if genome_id: + genome_id = check_parameter(genome_id) + query = query.where(Genome.genome_id.in_(genome_id)) + + if genome_uuid: + genome_uuid = check_parameter(genome_uuid) + query = query.where(Genome.genome_uuid.in_(genome_uuid)) + + if group_type: + group_type = check_parameter(group_type) + query = query.where(GenomeGroup.type.in_(group_type)) + + if is_current: + query = query.where(GenomeGroupMember.is_current == 1) + + if release_version is not None: + query = query.join( + EnsemblRelease, + EnsemblRelease.release_id == GenomeGroupMember.release_id + ).where(EnsemblRelease.version <= release_version) + + logger.debug(query) + with self.metadata_db.session_scope() as session: + session.expire_on_commit = False + return session.execute(query).scalars().all() + + def fetch_genome_group_members_detailed(self, genome_group_id=None, group_name=None, + is_current=True, release_version=None): + """ + Fetch genomes and their membership details for a genome group. + + This returns both the genome objects and their membership information (is_reference, etc.) + + Args: + genome_group_id (Union[int, List[int]]): The ID(s) of the genome group(s). + group_name (Union[str, List[str]]): The name(s) of the genome group(s). + is_current (bool): If True, return only current genome group memberships. + release_version (float): Return memberships up to this release version. + + Returns: + List of tuples (Genome, GenomeGroupMember) with full membership details. + """ + member_select = select(Genome, GenomeGroupMember).join( + GenomeGroupMember, Genome.genome_id == GenomeGroupMember.genome_id + ).join( + GenomeGroup, GenomeGroup.genome_group_id == GenomeGroupMember.genome_group_id + ) + + # Apply filters + if genome_group_id: + genome_group_id = check_parameter(genome_group_id) + member_select = member_select.where(GenomeGroup.genome_group_id.in_(genome_group_id)) + + if group_name: + group_name = check_parameter(group_name) + member_select = member_select.where(GenomeGroup.name.in_(group_name)) + + if is_current: + member_select = member_select.where(GenomeGroupMember.is_current == 1) + + # Handle release filtering + if release_version is not None: + member_select = member_select.join( + EnsemblRelease, + EnsemblRelease.release_id == GenomeGroupMember.release_id + ).where(EnsemblRelease.version <= release_version) + + logger.debug(f"Allow Unreleased {cfg.allow_unreleased}") + if not cfg.allow_unreleased: + member_select = member_select.where(EnsemblRelease.status == ReleaseStatus.RELEASED) + + # Order by is_reference descending so reference genomes appear first + member_select = member_select.order_by(desc(GenomeGroupMember.is_reference)) + + logger.debug(member_select) + with self.metadata_db.session_scope() as session: + session.expire_on_commit = False + return session.execute(member_select).all() + + def get_public_path(self, genome_uuid, dataset_type='all', release=None): paths = [] scientific_name = None diff --git a/src/ensembl/production/metadata/api/factories/utils.py b/src/ensembl/production/metadata/api/factories/utils.py index 3ec58aeb..005e3d21 100644 --- a/src/ensembl/production/metadata/api/factories/utils.py +++ b/src/ensembl/production/metadata/api/factories/utils.py @@ -10,10 +10,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from sqlalchemy.orm import aliased - -from ensembl.production.metadata.api.models import Dataset, Genome, GenomeDataset, DatasetAttribute, Attribute, Assembly - +from ensembl.production.metadata.api.models import Genome, Assembly def get_genome_sets_by_assembly_and_provider(session): """ @@ -26,47 +23,24 @@ def get_genome_sets_by_assembly_and_provider(session): DOES NOT HAVE A TEST. NOR DO WE HAVE UPDATES IN OUR TEST DB. BIG WORK TO UPDATE THIS. """ - # Aliases for clarity - genome_alias = aliased(Genome) - dataset_alias = aliased(Dataset) - dataset_attr_provider = aliased(DatasetAttribute) # Attribute for genebuild.provider - dataset_attr_geneset = aliased(DatasetAttribute) # Attribute for genebuild.last_geneset_update - attribute_provider = aliased(Attribute) - attribute_geneset = aliased(Attribute) - assembly_alias = aliased(Assembly) - - # Query to retrieve genome_uuid, assembly_uuid, provider, and last_geneset_update + # Query to retrieve genome_uuid, assembly_uuid, provider_name, and genebuild_date query = ( session.query( - genome_alias.genome_uuid, - assembly_alias.assembly_uuid, - dataset_attr_provider.value.label("provider_name"), - dataset_attr_geneset.value.label("last_geneset_update") - ) - .join(assembly_alias, genome_alias.assembly_id == assembly_alias.assembly_id) - .join(GenomeDataset, GenomeDataset.genome_id == genome_alias.genome_id) - .join(dataset_alias, GenomeDataset.dataset_id == dataset_alias.dataset_id) - # Join for provider attribute - .join(dataset_attr_provider, dataset_attr_provider.dataset_id == dataset_alias.dataset_id) - .join(attribute_provider, dataset_attr_provider.attribute_id == attribute_provider.attribute_id) - # Join for last_geneset_update attribute - .join(dataset_attr_geneset, dataset_attr_geneset.dataset_id == dataset_alias.dataset_id) - .join(attribute_geneset, dataset_attr_geneset.attribute_id == attribute_geneset.attribute_id) - .filter( - dataset_alias.dataset_type.has(name="genebuild"), # Ensure dataset is of type genebuild - attribute_provider.name == "genebuild.provider_name", # Ensure attribute is genebuild.provider_name - attribute_geneset.name == "genebuild.last_geneset_update" - # Ensure attribute is genebuild.last_geneset_update + Genome.genome_uuid, + Assembly.assembly_uuid, + Genome.provider_name, + Genome.genebuild_date ) + .join(Assembly, Genome.assembly_id == Assembly.assembly_id) ) # Organize results into a dictionary grouping genome_uuids by (assembly_uuid, provider) genome_sets = {} - for genome_uuid, assembly_uuid, provider, last_geneset_update in query.all(): - key = (assembly_uuid, provider) + for genome_uuid, assembly_uuid, provider_name, genebuild_date in query.all(): + key = (assembly_uuid, provider_name) if key not in genome_sets: genome_sets[key] = [] - genome_sets[key].append((genome_uuid, last_geneset_update)) # Keep last_geneset_update with each genome + genome_sets[key].append((genome_uuid, genebuild_date)) # Create a filtered dictionary where only groups with more than one genome are kept genome_sets_with_multiple = {key: genomes for key, genomes in genome_sets.items() if len(genomes) > 1} diff --git a/src/ensembl/production/metadata/api/models/assembly.py b/src/ensembl/production/metadata/api/models/assembly.py index f1af78d3..c71620f5 100644 --- a/src/ensembl/production/metadata/api/models/assembly.py +++ b/src/ensembl/production/metadata/api/models/assembly.py @@ -26,7 +26,7 @@ class Assembly(LoadAble, Base): assembly_id = Column(Integer, primary_key=True) assembly_uuid = Column(String(40), unique=True, nullable=False, default=lambda: str(uuid.uuid4())) ucsc_name = Column(String(16)) - accession = Column(String(16), nullable=False, unique=True) + accession = Column(String(16), nullable=False) level = Column(String(32), nullable=False) name = Column(String(128), nullable=False) accession_body = Column(String(32)) diff --git a/src/ensembl/production/metadata/api/models/genome.py b/src/ensembl/production/metadata/api/models/genome.py index 9cf2d9f1..64bdc7f4 100644 --- a/src/ensembl/production/metadata/api/models/genome.py +++ b/src/ensembl/production/metadata/api/models/genome.py @@ -31,9 +31,9 @@ class Genome(LoadAble, Base): assembly_id = Column(ForeignKey("assembly.assembly_id"), nullable=False, index=True) organism_id = Column(ForeignKey("organism.organism_id"), nullable=False, index=True) created = Column(DATETIME(fsp=6), nullable=False) - genebuild_version = Column(String(64), nullable=False, unique=False) production_name = Column(String(120), nullable=False, unique=False) annotation_source = Column(String(120), nullable=False, unique=False) + provider_name = Column(String(120), nullable=False, unique=False) genebuild_date = Column(String(20), nullable=False, unique=False) suppressed = Column(TINYINT(1), nullable=False, default=0) suppression_details = Column(String(255), nullable=True, unique=False) diff --git a/src/ensembl/production/metadata/updater/core.py b/src/ensembl/production/metadata/updater/core.py index 56a42010..0c20ddd2 100644 --- a/src/ensembl/production/metadata/updater/core.py +++ b/src/ensembl/production/metadata/updater/core.py @@ -10,6 +10,7 @@ # See the License for the specific language governing permissions and # limitations under the License.` import logging +import re import uuid from collections import defaultdict @@ -27,7 +28,6 @@ from ensembl.production.metadata.api.factories.datasets import DatasetFactory from ensembl.production.metadata.api.models import * from ensembl.production.metadata.updater.base import BaseMetaUpdater -from ensembl.production.metadata.updater.updater_utils import update_attributes logging.basicConfig(level=logging.INFO) @@ -38,13 +38,13 @@ class CoreMetaUpdater(BaseMetaUpdater): def __init__(self, db_uri, metadata_uri, taxonomy_uri, release=None): super().__init__(db_uri, metadata_uri, taxonomy_uri, release) self.db_type = 'core' - # Single query to get all of the metadata information. self.meta_dict = {} self._load_meta_dict() self._validate_required_attributes() def _load_meta_dict(self): - """Load metadata into meta_dict from the database.""" + """Load metadata into meta_dict from the database. + Stores all values for each meta_key as a list to handle potential duplicates.""" with self.db.session_scope() as session: results = session.query(Meta).filter(Meta.meta_value.isnot(None), Meta.meta_value.notin_(['', 'Null', 'NULL'])).all() @@ -54,40 +54,84 @@ def _load_meta_dict(self): meta_value = result.meta_value if species_id not in self.meta_dict: self.meta_dict[species_id] = {} - # WARNING! Duplicated meta_keys for a species_id will not error out!. A datacheck is necessary for key values. - self.meta_dict[species_id][meta_key] = meta_value + if meta_key not in self.meta_dict[species_id]: + self.meta_dict[species_id][meta_key] = [] + self.meta_dict[species_id][meta_key].append(meta_value) def _validate_required_attributes(self): """Check if all required attributes are present in the meta_dict for each species.""" - required_attribute_names = [] + # TODO: Move to datacheck with self.metadata_db.session_scope() as session: - # Query the attribute table to get all required attributes required_attributes = session.query(Attribute.name).filter(Attribute.required == 1).all() required_attribute_names = {attr.name for attr in required_attributes} - with self.db.session_scope() as session: - # Check each species_id in meta_dict - missing_attributes = {} - for species_id, meta in self.meta_dict.items(): - missing = required_attribute_names - set(meta.keys()) - if missing: - missing_attributes[species_id] = missing - - if missing_attributes: - exceptions.MissingMetaException( - "Species ID {species_id} is missing required attributes: {missing_attributes}") - - # Basic API for the meta table in the submission database. + missing_attributes = {} + for species_id, meta in self.meta_dict.items(): + if species_id is None: + continue + missing = required_attribute_names - set(meta.keys()) + if missing: + missing_attributes[species_id] = missing + + if missing_attributes: + error_msg = "\n".join([ + f"Species ID {species_id} is missing required attributes: {', '.join(sorted(missing))}" + for species_id, missing in missing_attributes.items() + ]) + raise exceptions.MissingMetaException(error_msg) + def get_meta_single_meta_key(self, species_id, parameter): + """ + Get a single value for a meta_key. + Raises an exception if multiple values exist for the same key. + + Returns: + str or None: The meta value, or None if not found + + Raises: + DuplicateMetaKeyException: If multiple values exist for the key + """ species_meta = self.meta_dict.get(species_id) if species_meta is None: return None - return species_meta.get(parameter) + + values = species_meta.get(parameter) + if values is None: + return None + + if len(values) > 1: + raise exceptions.MetaException( + f"Species {species_id} has {len(values)} values for meta_key '{parameter}': {values}. " + f"A single key is currently required to successfully hand over." + ) + + return values[0] + + def get_meta_all_values(self, species_id, parameter): + """ + Get all values for a meta_key, handling cases with 0, 1, or multiple values. + + Returns: + list: List of all values for the key (empty list if none exist) + """ + species_meta = self.meta_dict.get(species_id) + if species_meta is None: + return [] + + return species_meta.get(parameter, []) def get_meta_list_from_prefix_meta_key(self, species_id, prefix): + """ + Get all meta_keys with a given prefix, including all values. + + Returns: + dict or None: Dictionary of {key: [values]} where values is always a list, + or None if species not found + """ species_meta = self.meta_dict.get(species_id) if species_meta is None: return None + result_dict = {k: v for k, v in species_meta.items() if k.startswith(prefix)} return result_dict @@ -134,7 +178,6 @@ def process_core(self, **kwargs): # Process each species in its own transaction with self.metadata_db.session_scope() as meta_session: self.process_species(species_id, meta_session) - # If we get here without exception, the species was successful successful_species.append((species_id, production_name)) if len(multi_species) > 1: logger.info(f"Successfully processed species {species_id}: {production_name}") @@ -192,7 +235,7 @@ def process_species(self, species_id, meta_session): organism = self.get_or_new_organism(species_id, meta_session) assembly, assembly_dataset, assembly_dataset_attributes, assembly_sequences, dataset_source = self.get_or_new_assembly( species_id, meta_session) - genebuild_dataset, genebuild_dataset_attributes = self.get_or_new_genebuild(species_id, meta_session, + genebuild_dataset, genebuild_dataset_attributes = self._create_genebuild(species_id, meta_session, dataset_source) # Checking for an existing genome uuid: @@ -203,15 +246,18 @@ def process_species(self, species_id, meta_session): if old_genome is not None: raise exceptions.MetadataUpdateException( f"Species {species_id}: Core database contains a genome.genome_uuid which matches an entry in the meta table.") + # TODO: Move to datacheck else: raise exceptions.MetadataUpdateException( f"Species {species_id}: Database contains a Genome.genome_uuid, but corresponding data is not in meta table.") + # TODO: Move to datacheck if self.is_object_new(organism): logger.info(f'Species {species_id}: New organism') - if not self.is_object_new(genebuild_dataset): + if not self.is_object_new(assembly): raise exceptions.MetadataUpdateException( - f"Species {species_id}: New organism, but existing assembly accession and/or genebuild version") + f"Species {species_id}: New organism, but existing assembly accession") + # TODO: Move to datacheck , but leave here to be sure new_genome, assembly_genome_dataset, genebuild_genome_dataset = self.new_genome(meta_session, species_id, organism, @@ -220,13 +266,8 @@ def process_species(self, species_id, meta_session): genebuild_dataset) self.concurrent_commit_genome_uuid(meta_session, species_id, new_genome.genome_uuid) - - elif self.is_object_new(assembly): logger.info(f'Species {species_id}: New assembly') - if not self.is_object_new(genebuild_dataset): - raise exceptions.MetadataUpdateException( - f"Species {species_id}: New assembly, but existing genebuild version") new_genome, assembly_genome_dataset, genebuild_genome_dataset = self.new_genome(meta_session, species_id, organism, @@ -237,32 +278,18 @@ def process_species(self, species_id, meta_session): # Create genome and populate the database with assembly and dataset - elif self.is_object_new(genebuild_dataset): - # Check that genest update or provider name has changed from last time. - - dataset_attr_alias1 = aliased(DatasetAttribute) - attribute_alias1 = aliased(Attribute) - dataset_attr_alias2 = aliased(DatasetAttribute) - attribute_alias2 = aliased(Attribute) + else: provider_name = self.get_meta_single_meta_key(species_id, "genebuild.provider_name") geneset_update = self.get_meta_single_meta_key(species_id, "genebuild.last_geneset_update") - query = meta_session.query(Assembly).join( - Genome, Assembly.genomes - ).join(GenomeDataset, Genome.genome_datasets - ).join(Dataset, GenomeDataset.dataset - ).join(dataset_attr_alias1, Dataset.dataset_attributes - ).join(attribute_alias1, dataset_attr_alias1.attribute - ).join(dataset_attr_alias2, Dataset.dataset_attributes - ).join(attribute_alias2, dataset_attr_alias2.attribute - ).filter(Assembly.accession == assembly.accession, - Dataset.dataset_type.has(name="genebuild"), - and_( - attribute_alias1.name == "genebuild.provider_name", - dataset_attr_alias1.value == provider_name, - attribute_alias2.name == "genebuild.last_geneset_update", - dataset_attr_alias2.value == geneset_update - ) - ) + + query = meta_session.query(Genome).join( + Assembly, Genome.assembly + ).filter( + Assembly.accession == assembly.accession, + Genome.provider_name == provider_name, + Genome.genebuild_date == geneset_update + ) + if meta_session.query(query.exists()).scalar(): raise exceptions.MetadataUpdateException( "genebuild.provider_name or genebuild.last_geneset_update must be updated.") @@ -277,15 +304,7 @@ def process_species(self, species_id, meta_session): self.concurrent_commit_genome_uuid(meta_session, species_id, new_genome.genome_uuid) - else: - # Check if the data has been released - if check_release_status(self.metadata_db, genebuild_dataset.dataset_uuid): - raise exceptions.WrongReleaseException( - f"Species {species_id}: Existing Organism, Assembly, and Datasets within a release.") - else: - logger.info(f'Species {species_id}: Rewrite of existing datasets attempted') - raise exceptions.MetadataUpdateException( - f"Species {species_id}: This looks like a reload of data that hasn't been released.") + def concurrent_commit_genome_uuid(self, meta_session, species_id, genome_uuid): # Currently impossible with myisam without two phase commit (requires full refactor) @@ -316,12 +335,12 @@ def concurrent_commit_genome_uuid(self, meta_session, species_id, genome_uuid): def new_genome(self, meta_session, species_id, organism, assembly, assembly_dataset, genebuild_dataset): production_name = self.get_meta_single_meta_key(species_id, "organism.production_name") - genebuild_version = self.get_meta_single_meta_key(species_id, "genebuild.version") genebuild_date = self.get_meta_single_meta_key(species_id, "genebuild.last_geneset_update") url_name = self.get_meta_single_meta_key(species_id, "assembly.url_name") + provider_name = self.get_meta_single_meta_key(species_id, "genebuild.provider_name") annotation_source = self.get_meta_single_meta_key(species_id, "genebuild.annotation_source") - if genebuild_date is None: ##TODO Make this so any of the above are none it fails! - raise exceptions.MetadataUpdateException(f"Unable to parse genebuild.last_geneset_update from meta") + if None in (production_name, genebuild_date, annotation_source, provider_name): + raise exceptions.MetadataUpdateException(f"Unable to find required keys from meta") # get next release inline to attach the genome to planned_release = get_or_new_release(self.metadata_uri) new_genome = Genome( @@ -329,11 +348,11 @@ def new_genome(self, meta_session, species_id, organism, assembly, assembly_data assembly=assembly, organism=organism, genebuild_date=genebuild_date, - genebuild_version=genebuild_version, created=func.now(), production_name=production_name, url_name=url_name, - annotation_source=annotation_source + annotation_source=annotation_source, + provider_name=provider_name ) logger.debug(f"Assigning genome {new_genome.genome_uuid} to {planned_release.version}") meta_session.add(new_genome) @@ -354,6 +373,10 @@ def new_genome(self, meta_session, species_id, organism, assembly, assembly_data is_current=True, ) meta_session.add(genebuild_genome_dataset) + + self._create_genome_group_members(meta_session, species_id, new_genome, planned_release) + + # Homology dataset creation homology_uuid, homology_dataset, homology_dataset_attributes, homology_genome_dataset = self.new_homology( meta_session, species_id, genome=new_genome) @@ -367,6 +390,46 @@ def new_genome(self, meta_session, species_id, organism, assembly, assembly_data return new_genome, assembly_genome_dataset, genebuild_genome_dataset + def _create_genome_group_members(self, meta_session, species_id, new_genome, planned_release): + """ + Add genome to genome groups specified in meta keys. + + Args: + meta_session: The metadata database session + species_id: The species ID from the core database + new_genome: The newly created Genome object + planned_release: The EnsemblRelease object + + Raises: + MetadataUpdateException: If a specified genome group doesn't exist + """ + genome_group_names = self.get_meta_all_values(species_id, "genome.genome_group") + + if not genome_group_names: + return + + for group_name in genome_group_names: + # Check if the genome group exists + genome_group = meta_session.query(GenomeGroup).filter( + GenomeGroup.name == group_name + ).one_or_none() + + if genome_group is None: + raise exceptions.MetadataUpdateException( + f"Genome group '{group_name}' specified in meta key 'genome.genome_group' does not exist in the database" + ) + + # Create GenomeGroupMember + genome_group_member = GenomeGroupMember( + genome=new_genome, + genome_group=genome_group, + ensembl_release=planned_release, + is_current=1, + is_reference=0 + ) + meta_session.add(genome_group_member) + logger.info(f"Added genome {new_genome.genome_uuid} to genome group '{group_name}'") + def get_or_new_organism(self, species_id, meta_session): """ Get an existing Organism instance or create a new one, depending on the information from the metadata database. @@ -438,16 +501,19 @@ def get_or_new_organism(self, species_id, meta_session): def get_assembly_sequences(self, species_id, assembly): """ - Get the assembly sequences and the values that correspond to the metadata table + Get the assembly sequences and aliases from the core DB. + Returns both AssemblySequence and SequenceAlias objects. """ assembly_sequences = [] + sequence_aliases = [] + with self.db.session_scope() as session: circular_seq_attrib = aliased(SeqRegionAttrib) results = (session.query(SeqRegion.name, SeqRegion.length, CoordSystem.name.label("coord_system_name"), SeqRegionSynonym.synonym, circular_seq_attrib.value.label("is_circular")) .outerjoin(SeqRegion.coord_system) .outerjoin(SeqRegionSynonym, SeqRegionSynonym.seq_region_id == SeqRegion.seq_region_id) - .join(SeqRegion.seq_region_attrib) # For other attributes + .join(SeqRegion.seq_region_attrib) .outerjoin(circular_seq_attrib, and_(circular_seq_attrib.seq_region_id == SeqRegion.seq_region_id, circular_seq_attrib.attrib_type.has(code="circular_seq"))) @@ -456,73 +522,60 @@ def get_assembly_sequences(self, species_id, assembly): .filter(AttribType.code == "toplevel") .filter(CoordSystem.name != "lrg") .all()) + attributes = (session.query(SeqRegion.name, AttribType.code, SeqRegionAttrib.value) .select_from(SeqRegion) .join(SeqRegionAttrib) .join(AttribType) .filter(or_(AttribType.code == "sequence_location", AttribType.code == "karyotype_rank")).all()) + attribute_dict = {} for name, code, value in attributes: if name not in attribute_dict: attribute_dict[name] = {} attribute_dict[name][code] = value + # Single pass: collect synonyms AND process sequence info + synonym_dict = defaultdict(list) accession_info = defaultdict( - # The None's here are improper, but they break far too much for this update if they are changed. - # When accession is decided I will fix them. - # TODO: Just delete the comment. No one cares about the assembly sequence table. lambda: { - "names": set(), "accession": None, "length": None, "location": None, "chromosomal": None, - "karyotype_rank": None + "length": None, "location": None, "chromosomal": None, + "karyotype_rank": None, "type": None, "is_circular": 0 }) + location_mapping = { + 'nuclear_chromosome': 'SO:0000738', + 'mitochondrial_chromosome': 'SO:0000737', + 'chloroplast_chromosome': 'SO:0000745', + 'apicoplast_chromosome': 'SO:0001259', + None: 'SO:0000738', + } + for seq_region_name, seq_region_length, coord_system_name, synonym, is_circular in results: - accession_info[seq_region_name]["names"].add(seq_region_name) if synonym: - accession_info[seq_region_name]["names"].add(synonym) - - # Save the sequence location, length, and chromosomal flag. - location_mapping = { - 'nuclear_chromosome': 'SO:0000738', - 'mitochondrial_chromosome': 'SO:0000737', - 'chloroplast_chromosome': 'SO:0000745', - 'apicoplast_chromosome': 'SO:0001259', - None: 'SO:0000738', - } - # Try to get the sequence location - location = attribute_dict.get(seq_region_name, {}).get("sequence_location", None) - - # Using the retrieved location to get the sequence location - sequence_location = location_mapping[location] - - # Try to get the karyotype rank - karyotype_rank = attribute_dict.get(seq_region_name, {}).get("karyotype_rank", None) + synonym_dict[seq_region_name].append(synonym) - # Test if chromosomal: - if karyotype_rank is not None: - chromosomal = 1 - else: - chromosomal = 1 if coord_system_name == "chromosome" else 0 + if accession_info[seq_region_name]["length"] is None: + location = attribute_dict.get(seq_region_name, {}).get("sequence_location", None) + sequence_location = location_mapping[location] + karyotype_rank = attribute_dict.get(seq_region_name, {}).get("karyotype_rank", None) - # Assign the values to the dictionary - if not accession_info[seq_region_name]["length"]: - accession_info[seq_region_name]["length"] = seq_region_length + chromosomal = 1 if karyotype_rank is not None else (1 if coord_system_name == "chromosome" else 0) - if not accession_info[seq_region_name]["location"]: - accession_info[seq_region_name]["location"] = sequence_location + accession_info[seq_region_name].update({ + "length": seq_region_length, + "location": sequence_location, + "chromosomal": chromosomal, + "karyotype_rank": karyotype_rank, + "type": coord_system_name, + "is_circular": 1 if is_circular == "1" else 0 + }) - if accession_info[seq_region_name]["chromosomal"] is None: # Assuming default is None - accession_info[seq_region_name]["chromosomal"] = chromosomal + for seq_region_name, info in accession_info.items(): + # Determine the proper accession + accession = self._get_valid_accession(seq_region_name, synonym_dict.get(seq_region_name, [])) - if not accession_info[seq_region_name]["karyotype_rank"]: - accession_info[seq_region_name]["karyotype_rank"] = karyotype_rank - - accession_info[seq_region_name]["type"] = coord_system_name - accession_info[seq_region_name]["is_circular"] = 1 if is_circular == "1" else 0 - - for accession, info in accession_info.items(): - seq_region_name = accession assembly_sequence = AssemblySequence( name=seq_region_name, assembly=assembly, @@ -531,136 +584,327 @@ def get_assembly_sequences(self, species_id, assembly): length=info["length"], sequence_location=info["location"], chromosome_rank=info["karyotype_rank"], - # md5="", Populated after checksums are ran. - # sha512t4u="", Populated after checksums are ran. type=info["type"], is_circular=info["is_circular"] ) - assembly_sequences.append(assembly_sequence) - return assembly_sequences + + # Create SequenceAlias objects for each synonym + for synonym in synonym_dict.get(seq_region_name, []): + sequence_alias = SequenceAlias( + assembly_sequence=assembly_sequence, + alias=synonym, + source="core" + ) + sequence_aliases.append(sequence_alias) + + return assembly_sequences, sequence_aliases + + def _is_valid_ena_accession(self, identifier): + """ + Check if an identifier matches ENA sequence identifier rules for annotated sequences. + + Valid patterns: + - [A-Z]{1}[0-9]{5}.[0-9]+ + - [A-Z]{2}[0-9]{6}.[0-9]+ + - [A-Z]{2}[0-9]{8} + - [A-Z]{4}[0-9]{2}S?[0-9]{6,8} + - [A-Z]{6}[0-9]{2}S?[0-9]{7,9} + + Returns: + bool: True if identifier matches any pattern + """ + patterns = [ + r'^[A-Z]{1}[0-9]{5}\.[0-9]+$', + r'^[A-Z]{2}[0-9]{6}\.[0-9]+$', + r'^[A-Z]{2}[0-9]{8}$', + r'^[A-Z]{4}[0-9]{2}S?[0-9]{6,8}$', + r'^[A-Z]{6}[0-9]{2}S?[0-9]{7,9}$', + ] + + return any(re.match(pattern, identifier) for pattern in patterns) + + def _get_valid_accession(self, seq_region_name, synonyms): + """ + Get a valid ENA accession for a sequence region. + + First checks if the seq_region_name matches ENA rules. + If not, searches through synonyms for the first match. + + Args: + seq_region_name: The sequence region name from core DB + synonyms: List of synonyms for this sequence region + + Returns: + str: Valid ENA accession + + Raises: + MetadataUpdateException: If no valid accession found + """ + if self._is_valid_ena_accession(seq_region_name): + return seq_region_name + + # Search through synonyms for the first valid accession + # TODO: Make this match the assembly report instead of taking first match + for synonym in synonyms: + if self._is_valid_ena_accession(synonym): + return synonym + + raise exceptions.MetadataUpdateException( + f"No sequence accession found that matches ENA identifier rules for sequence '{seq_region_name}'. " + f"Checked name and {len(synonyms)} synonym(s): {synonyms}" + ) def get_or_new_assembly(self, species_id, meta_session, source=None): - # Get the new assembly accession from the core handed over + """ + Queries the existing metadata to see if the assembly exists and determines + whether to attach to existing, create new, or return an error. + + Handles multiple assemblies with same accession by comparing sequences. + Excludes assemblies with FAULTY dataset status. + """ + assembly_accession = self.get_meta_single_meta_key(species_id, "assembly.accession") - assembly = meta_session.query(Assembly).filter(Assembly.accession == assembly_accession).one_or_none() + # Query assemblies but exclude those with faulty assembly datasets + assemblies = (meta_session.query(Assembly) + .outerjoin(Genome, Genome.assembly_id == Assembly.assembly_id) + .outerjoin(GenomeDataset, GenomeDataset.genome_id == Genome.genome_id) + .outerjoin(Dataset, Dataset.dataset_id == GenomeDataset.dataset_id) + .outerjoin(DatasetType, Dataset.dataset_type_id == DatasetType.dataset_type_id) + .filter(Assembly.accession == assembly_accession) + .filter(or_( + DatasetType.name != "assembly", + Dataset.status != DatasetStatus.FAULTY + )).distinct().all()) if source is None: dataset_source = self.get_or_new_source(meta_session, "core") else: dataset_source = source - # This should return the existing objects - if assembly is not None: - # Get the existing assembly dataset - assembly_dataset = meta_session.query(Dataset).filter(Dataset.label == assembly_accession).one_or_none() - # I should not need this, but double check on database updating. - assembly_dataset_attributes = assembly_dataset.dataset_attributes - assembly_sequences = assembly.assembly_sequences - return assembly, assembly_dataset, assembly_dataset_attributes, assembly_sequences, dataset_source - else: - attributes = self.get_meta_list_from_prefix_meta_key(species_id, "assembly") - is_reference = 1 if self.get_meta_single_meta_key(species_id, "assembly.is_reference") else 0 - with self.db.session_scope() as session: - level = (session.execute(db.select(CoordSystem.name).filter( - CoordSystem.species_id == species_id).order_by(CoordSystem.rank)).all())[0][0] - tol_id = self.get_meta_single_meta_key(species_id, "assembly.tol_id") - accession_body = self.get_meta_single_meta_key(species_id, - "assembly.accession_body") if self.get_meta_single_meta_key( - species_id, "assembly.accession_body") else "INSDC" - assembly = Assembly( - ucsc_name=self.get_meta_single_meta_key(species_id, "assembly.ucsc_alias"), - accession=self.get_meta_single_meta_key(species_id, "assembly.accession"), - level=level, - name=self.get_meta_single_meta_key(species_id, "assembly.name"), - accession_body=accession_body, - assembly_default=self.get_meta_single_meta_key(species_id, "assembly.default"), - tol_id=tol_id, - created=func.now(), - assembly_uuid=str(uuid.uuid4()), - is_reference=is_reference - ) - dataset_factory = DatasetFactory(self.metadata_uri) - dataset_type = meta_session.query(DatasetType).filter(DatasetType.name == "assembly").first() - (dataset_uuid, assembly_dataset, assembly_dataset_attributes, - new_genome_dataset) = dataset_factory.create_dataset(meta_session, None, dataset_source, - dataset_type, attributes, "assembly", - assembly.accession, None, - DatasetStatus.PROCESSED) - meta_session.add(assembly) - meta_session.add(assembly_dataset) - assembly_sequences = self.get_assembly_sequences(species_id, assembly) - meta_session.add_all(assembly_sequences) - - meta_session.add_all(assembly_dataset_attributes) - return assembly, assembly_dataset, assembly_dataset_attributes, assembly_sequences, dataset_source - - def get_or_new_genebuild(self, species_id, meta_session, source=None, existing=False): + # Query core DB once upfront - get names and count together + incoming_seq_names = self._get_incoming_sequence_names(species_id) + incoming_count = len(incoming_seq_names) + + # Case 1: New assembly accession - Fresh load + if not assemblies: + return self._create_new_assembly(species_id, meta_session, dataset_source, assembly_accession) + + # Check for force new UUID flag + force_new_uuid = self.get_meta_single_meta_key(species_id, "assembly.create_new_uuid") + + # Find assemblies that match on sequence count and names + matching_assembly = self._find_matching_assembly(assemblies, incoming_seq_names, incoming_count) + + # Case 2: Found exact match - Attach to existing + if matching_assembly is not None: + return self._attach_to_existing_assembly(matching_assembly, meta_session, assembly_accession, + dataset_source) + + # No exact match found - either error or force new + if force_new_uuid == "1" or force_new_uuid == 1: + return self._create_new_assembly(species_id, meta_session, dataset_source, assembly_accession) + + # Return error describing discrepancies + error_details = self._generate_discrepancy_error(assemblies, incoming_seq_names, incoming_count) + raise exceptions.MetadataUpdateException(f"Assembly mismatch: {error_details}") + + def _find_matching_assembly(self, assemblies, incoming_names, incoming_count): """ - Process an individual species from a core database to update the metadata db. - This method contains the logic for updating the metadata - This is not a get, as we don't update the metadata for genebuild, only replace it if it is not released. + Find an assembly that matches both sequence count and names. + Uses pre-fetched incoming data to avoid redundant queries. + + Returns: + Assembly or None: The matching assembly if found, None otherwise + """ + # Filter to assemblies with matching count + count_matches = [a for a in assemblies if len(a.assembly_sequences) == incoming_count] + + if not count_matches: + return None + + # From those, find one with matching names + for assembly in count_matches: + existing_names = {seq.name for seq in assembly.assembly_sequences} + if existing_names == incoming_names: + return assembly + + return None + + def _get_incoming_sequence_names(self, species_id): + """ + Get the names of top-level sequences from the core DB. + Single query to avoid redundancy. + + Returns: + set: Set of sequence names + """ + with self.db.session_scope() as session: + results = (session.query(SeqRegion.name) + .join(SeqRegion.coord_system) + .join(SeqRegion.seq_region_attrib) + .join(SeqRegionAttrib.attrib_type) + .filter(CoordSystem.species_id == species_id) + .filter(AttribType.code == "toplevel") + .filter(CoordSystem.name != "lrg") + .all()) + return {name for (name,) in results} + + def _generate_discrepancy_error(self, assemblies, incoming_names, incoming_count): + """ + Generate a detailed error message describing why no match was found. + """ + count_matching_assemblies = [a for a in assemblies if len(a.assembly_sequences) == incoming_count] + + if not count_matching_assemblies: + # No count matches + assembly_info = [(a.assembly_uuid, len(a.assembly_sequences)) for a in assemblies] + counts_str = ", ".join([f"UUID {uuid}: {count} sequences" for uuid, count in assembly_info]) + return (f"Assembly accession found {len(assemblies)} time(s) in database, " + f"but none match incoming sequence count of {incoming_count}. " + f"Existing counts: {counts_str}") + + # Count matches but names don't + error_lines = [ + f"Assembly accession found with matching sequence count ({incoming_count}), " + f"but sequence names do not match.", + f"Incoming names: {sorted(incoming_names)}" + ] + + for assembly in count_matching_assemblies: + existing_names = {seq.name for seq in assembly.assembly_sequences} + missing = incoming_names - existing_names + extra = existing_names - incoming_names + + error_lines.append(f"\nUUID {assembly.assembly_uuid}: {sorted(existing_names)}") + if missing: + error_lines.append(f" Missing in existing: {sorted(missing)}") + if extra: + error_lines.append(f" Extra in existing: {sorted(extra)}") + + return "\n".join(error_lines) + + def _attach_to_existing_assembly(self, assembly, meta_session, assembly_accession, dataset_source): + """Attach to existing assembly when sequences match.""" + # Find the assembly dataset through the relationship path + # Assembly -> Genome -> GenomeDataset -> Dataset + assembly_dataset = (meta_session.query(Dataset) + .join(GenomeDataset, GenomeDataset.dataset_id == Dataset.dataset_id) + .join(Genome, Genome.genome_id == GenomeDataset.genome_id) + .join(DatasetType, Dataset.dataset_type_id == DatasetType.dataset_type_id) + .filter(Genome.assembly_id == assembly.assembly_id) + .filter(DatasetType.name == "assembly") + .filter(Dataset.status != DatasetStatus.FAULTY) + .first()) + + if assembly_dataset is None: + raise exceptions.MetadataUpdateException( + f"Assembly {assembly_accession} exists but no valid (non-faulty) assembly dataset found") + + assembly_dataset_attributes = assembly_dataset.dataset_attributes + assembly_sequences = assembly.assembly_sequences + return assembly, assembly_dataset, assembly_dataset_attributes, assembly_sequences, dataset_source + + def _create_new_assembly(self, species_id, meta_session, dataset_source, assembly_accession): + """Create a new assembly with unique UUID.""" + attributes = self.get_meta_list_from_prefix_meta_key(species_id, "assembly") + is_reference = 1 if self.get_meta_single_meta_key(species_id, "assembly.is_reference") else 0 + + with self.db.session_scope() as session: + level = (session.execute(db.select(CoordSystem.name).filter( + CoordSystem.species_id == species_id).order_by(CoordSystem.rank)).all())[0][0] + tol_id = self.get_meta_single_meta_key(species_id, "assembly.tol_id") + accession_body = self.get_meta_single_meta_key(species_id, + "assembly.accession_body") if self.get_meta_single_meta_key( + species_id, "assembly.accession_body") else "INSDC" + + assembly = Assembly( + ucsc_name=self.get_meta_single_meta_key(species_id, "assembly.ucsc_alias"), + accession=assembly_accession, + level=level, + name=self.get_meta_single_meta_key(species_id, "assembly.name"), + accession_body=accession_body, + assembly_default=self.get_meta_single_meta_key(species_id, "assembly.default"), + tol_id=tol_id, + created=func.now(), + assembly_uuid=str(uuid.uuid4()), + is_reference=is_reference + ) + + dataset_factory = DatasetFactory(self.metadata_uri) + dataset_type = meta_session.query(DatasetType).filter(DatasetType.name == "assembly").first() + (dataset_uuid, assembly_dataset, assembly_dataset_attributes, + new_genome_dataset) = dataset_factory.create_dataset(meta_session, None, dataset_source, + dataset_type, attributes, "assembly", + assembly.accession, None, + DatasetStatus.PROCESSED) + + meta_session.add(assembly) + meta_session.add(assembly_dataset) + + # Get assembly sequences AND aliases + assembly_sequences, sequence_aliases = self.get_assembly_sequences(species_id, assembly) + + meta_session.add_all(assembly_sequences) + meta_session.add_all(sequence_aliases) + meta_session.add_all(assembly_dataset_attributes) + + return assembly, assembly_dataset, assembly_dataset_attributes, assembly_sequences, dataset_source + + def _create_genebuild(self, species_id, meta_session, source=None): + """ + Create a new genebuild dataset for a species from a core database. + This method always creates a new dataset - if a matching genome already exists, it throws an exception. + The uniqueness is enforced at the Genome level (assembly + provider + genebuild_date). """ assembly_accession = self.get_meta_single_meta_key(species_id, "assembly.accession") - genebuild_version = self.get_meta_single_meta_key(species_id, "genebuild.version") provider_name = self.get_meta_single_meta_key(species_id, "genebuild.provider_name") last_geneset_update = self.get_meta_single_meta_key(species_id, "genebuild.last_geneset_update") - provider_name_attr = aliased(DatasetAttribute, name="provider_name_attr") - last_geneset_update_attr = aliased(DatasetAttribute, name="last_geneset_update_attr") - - # Query for an existing combination + # Query for an existing combination - this is our uniqueness check + # If this exists, we should NOT create a new one existing_combination = ( meta_session.query(Genome.genome_id) - .join(GenomeDataset, Genome.genome_id == GenomeDataset.genome_id) - .join(Dataset, GenomeDataset.dataset_id == Dataset.dataset_id) .join(Assembly, Genome.assembly_id == Assembly.assembly_id) - .join(provider_name_attr, Dataset.dataset_id == provider_name_attr.dataset_id) - .join(last_geneset_update_attr, Dataset.dataset_id == last_geneset_update_attr.dataset_id) .filter( - Dataset.name == "genebuild", Assembly.accession == assembly_accession, - provider_name_attr.value == provider_name, - last_geneset_update_attr.value == last_geneset_update, - provider_name_attr.attribute.has(Attribute.name == "genebuild.provider_name"), - last_geneset_update_attr.attribute.has(Attribute.name == "genebuild.last_geneset_update"), + Genome.provider_name == provider_name, + Genome.genebuild_date == last_geneset_update, ) ) test_for_existing = meta_session.query(existing_combination.exists()).scalar() - # Check if the combination exists if test_for_existing: raise exceptions.MetaException( - "genebuild.provider_name, genebuild.last_geneset_update, and assembly.accession cannot match existing records." + f"Genebuild already exists for assembly {assembly_accession} " + f"with provider '{provider_name}' and date '{last_geneset_update}'. " + "Cannot create duplicate genebuild." ) - # The genebuild accession is formed by combining the assembly accession and the genebuild version - genebuild_accession = assembly_accession + "_" + genebuild_version + # Create a label for the dataset - this is just for human readability + # Old labels stay untouched; new ones use a descriptive format + genebuild_label = f"{assembly_accession}_{provider_name}_{last_geneset_update}" + if source is None: dataset_source = self.get_or_new_source(meta_session, "core") else: dataset_source = source dataset_type = meta_session.query(DatasetType).filter(DatasetType.name == "genebuild").first() - test_status = meta_session.query(Dataset).filter(Dataset.label == genebuild_accession).one_or_none() - # Return existing data if no update is required - if test_status is not None and existing is False: - genebuild_dataset = test_status - genebuild_dataset_attributes = genebuild_dataset.dataset_attributes - return genebuild_dataset, genebuild_dataset_attributes + # Get all genebuild attributes from the core database attributes = self.get_meta_list_from_prefix_meta_key(species_id, "genebuild.") - if existing is False: - dataset_factory = DatasetFactory(self.metadata_uri) - (dataset_uuid, genebuild_dataset, genebuild_dataset_attributes, - new_genome_dataset) = dataset_factory.create_dataset(meta_session, None, dataset_source, - dataset_type, attributes, "genebuild", - genebuild_accession, genebuild_version) - else: - genebuild_dataset = existing - genebuild_dataset.label = genebuild_accession - genebuild_dataset.dataset_source = dataset_source - genebuild_dataset.version = genebuild_version - genebuild_dataset_attributes = update_attributes(genebuild_dataset, attributes, meta_session, replace=True) + + # Use genebuild_date as the version (more meaningful than arbitrary version numbers) + dataset_version = last_geneset_update + + # Create new dataset + dataset_factory = DatasetFactory(self.metadata_uri) + (dataset_uuid, genebuild_dataset, genebuild_dataset_attributes, + new_genome_dataset) = dataset_factory.create_dataset( + meta_session, None, dataset_source, + dataset_type, attributes, "genebuild", + genebuild_label, dataset_version + ) return genebuild_dataset, genebuild_dataset_attributes diff --git a/src/ensembl/production/metadata/updater/updater_utils.py b/src/ensembl/production/metadata/updater/updater_utils.py index 1dd5145c..e5e59202 100644 --- a/src/ensembl/production/metadata/updater/updater_utils.py +++ b/src/ensembl/production/metadata/updater/updater_utils.py @@ -14,21 +14,43 @@ def update_attributes(dataset, attributes, session, replace=False): - # TODO If attributes already exist, update them. Add option to replace all. + """ + Update or create dataset attributes. + + Args: + dataset: The dataset object to attach attributes to + attributes: Dictionary of {attribute_name: value} where value can be: + - A single value: "GRCh38" + - A list of values: ["hg38", "Human"] + session: Database session + replace: If True, delete existing attributes before adding new ones + + Returns: + list: List of created DatasetAttribute objects + """ dataset_attributes = [] + if replace: for dataset_attribute in dataset.dataset_attributes: session.delete(dataset_attribute) - session.flush() - for attribute, value in attributes.items(): - meta_attribute = session.query(Attribute).filter(Attribute.name == attribute).one_or_none() + session.flush() + + for attribute_name, attribute_value in attributes.items(): + meta_attribute = session.query(Attribute).filter(Attribute.name == attribute_name).one_or_none() if meta_attribute is None: - raise UpdaterException(f"{attribute} does not exist. Add it to the database and reload.") - new_dataset_attribute = DatasetAttribute( - value=value, - dataset=dataset, - attribute=meta_attribute, - ) - session.add(new_dataset_attribute) - dataset_attributes.append(new_dataset_attribute) + raise UpdaterException(f"{attribute_name} does not exist. Add it to the database and reload.") + + # Normalize to list format + values = attribute_value if isinstance(attribute_value, list) else [attribute_value] + + # Create a DatasetAttribute for each value + for value in values: + new_dataset_attribute = DatasetAttribute( + value=value, + dataset=dataset, + attribute=meta_attribute, + ) + session.add(new_dataset_attribute) + dataset_attributes.append(new_dataset_attribute) + return dataset_attributes \ No newline at end of file diff --git a/src/tests/databases/core_1.db b/src/tests/databases/core_1.db index 42f1fdf58fde2efc30fe0d46126aab53293d5a56..8b0c48ba592a136783c0d8b5a7a71089228f99ba 100644 GIT binary patch delta 144 zcmZp8z}WDBae_1>|3n#QR(=M(vd)bu3-p<#cp@gV8yGY3L~Irmkm1qMl4WJ!wA4;d z%}Y%x&CE&BD=5k@%S=fviZ3n7DK9VAOU)}z%}vVD%P&gb++yP)Z0P7{Xk=_+YNlr_ gnU^KV#LA$^$pjZOVwxqy2ohq33mI(8tJ0IuaGy#N3J delta 96 zcmZp8z}WDBae_1>??f4AR$d0ZaIcLi3-p;Kd0Hm38yIgE6!7BN++yP)Y#8Di9AX?_ qoLcBCnU^KV#LA$^$pjKJ0*N^>%@Se+2{D6&3_(JUoB#SVD*yl!iyBV= diff --git a/src/tests/databases/core_2.db b/src/tests/databases/core_2.db index 6c02fa8788dc4aa0dd2bc13bd599d46698eb8070..edd193942d2a43247ac003ff4f13db4ea392b2e5 100644 GIT binary patch delta 194 zcmZp8z}WDBae_1>|3n#QR(=M(vd)bu3-p<#dE6(n8<;ZjxNjB|;N;QKl3``wwA4;d z%}Y%x&CE&BD=5k@%S=fviZ3n7DK9VAOU)}z%}vVD%P&gbTx`?F%*%g*fti0H1OGz) zt^7AP3o1??f4AR$d0ZaIcLi3-p<#c#0>p8<=hu6fo!6Tx`?F%*(*Qz|4Pw zf&VT4ZT=IR1r@gNPYhtw6l7*)kmqC$aSaYJjxSCvl+4Q#U;+s-frN}eLQJy+8Ce-N JfAr^90045a9I*fZ diff --git a/src/tests/databases/core_3.db b/src/tests/databases/core_3.db index 859ee74bf06ee718b4bf2fd8e9b8bef8e5aa3c60..47a0c92eb01bd4ab3f562bde7d46497b98a1f68c 100644 GIT binary patch delta 179 zcmZp8z}WDBae_3Xz(g5mRsjaRvd)bu3-p;Kc&aC}8yGNFZx$4A=3&>8VP)X7)Sk>} zFDz7Alv7?_u9uouoSK`IqnBTlzInM#6f-aX2?l2Vg$(=)`M2`l+$^XtnLj{?k(EJ_ zliAVH(8$=t)J)HiOGitP2_(h@7c*j-#s8K;P@a=H#5Fj?IKDWwP*R8)RiCkBUKRrb LgYf2${`?96Y2hm< delta 112 zcmZp8z}WDBae_1>-$WT_Rz3#3aIcLi3-p=Ad6rLRH!xsazFANppJ(%Onp1_lOZ z{u2!RZ~1TYpV%y@u!VnO0Gp;DGb@8UCv%8vaENhyacZGtUX}n8NQem}WCRjonkC4{ L%CPyPKfeM1*>N4e diff --git a/src/tests/databases/core_4.db b/src/tests/databases/core_4.db index 76f7091b2c9956089dd4f7fb4f25e80f72ef30fc..2742c71aaa675a679eb77da6b5b48e154eacd8c5 100644 GIT binary patch delta 194 zcmZp8z}WDBae_1>|3n#QR(=M(vd)bu3-p;Kc{(Pu8yGV2bZizB@Z-_Zl3``wwA4;d z%}Y%x&CE&BD=5k@%S=fviZ3n7DK9VAOU)}z%}vVD%P&gbyv4?snV0_r12g|Z2L6To zTlsHp7F3wbA0WiY%Am-}?C5A{WNcz;rf0~dqb0}$5@Ujk88OY`f6E{!&&eF(8XRIA aUz}PfDa4Gb&sZ`qi-Cbbc=Jbpegy#QG%_v# delta 110 zcmZp8z}WDBae_1>??f4AR$d0ZaIcLi3-p;Kc(zPtH!$2RC{V+*d5euNGcN-J12g{# z2L8AFxA{+O7F5{6KQVw!Q;?aJL7tO2#5Fj?IKDWwP%|3n#QR(=M(vd)bu3-p=Acs@*KH_&F{`LJ0~U=NRumJBNcr=@m! zYF=tmX=YA}UO`cQS!POVQG97pPI-B`UTR)(YHm`FUVc&fW*OV3OuYOj7?}ANGVm|t z-^zb;v!KFc{s19HRt7~*W=BUuBV!X&Gd)8t9W6m7kQftO%!p|g|62w@c~0gK*WeK2 b_~O(;Ng-xbea4b`Squye!ka((^D6)Vtz9!? delta 110 zcmZp8z}WDBae_1>??f4AR$d0ZaIcLi3-p;qd1WTE8)$D96nMq6S;qD$6E6b;12g{# z2L8AFxA{+O7F5{6KQVw!Q;?aJL7tO2#5Fj?IKDWwP%|3n#QR(=M(vd)bu3-p<#c{C=o8<;ZjXlxb~c*U)wCBw?VX{nu_ znwOeXnwgWLS5TB+mYI@T6kl4DQ(j)Kmzr0cnwylPmtT~=ImBiyGcW%M24?<+4EziE zxANcIET}M|CT{eo|8GmH8{jL azBsi|QivH@< delta 110 zcmZp8z}WDBae_1>??f4AR$d0ZaIcLi3-p<#ctR$#8<=hu6p-NA9AdMUnU{fqftmjV z1OHq8+x#as3o2~kpBTWVDag#qAkWDh;u;)c9ABJTD4CZfzyuOv0tp#`gqUUtGO{vk J{^-xI003VT9CH8w diff --git a/src/tests/databases/core_7.db b/src/tests/databases/core_7.db index 531c17f31cac65c826ae92408f58ae9beaecd100..cad377bef894b68d3e3a12c9f5f7f3eee5dfd2b8 100644 GIT binary patch delta 188 zcmZp8z}WDBae_1>|3n#QR(=M(vd)bu3-p<#d2}YT8<;Zj=xi1gc*m`yCCAFZX{nu_ znwOeXnwgWLS5TB+mYI@T6kl4DQ(j)Kmzr0cnwylPmtT~=Il^WoGcW%$24?>K4E%5S zFZ1u;EU2)Q-%Em-l|h%2Im9(M#8?Q(R^()MbTl+FHZe8RGn35A5@Z62F~P-5nPxFC TFtF)z3NfN8Hrf2wpIHF_zSuA3 delta 140 zcmZp8z}WDBae_1>??f4AR$d0ZaIcLi3-p<#cp@gV8<=hu6p-QB9AUGPnU{fqfti0Z z1OGGrv;3Pk3o0z-_mW^{Wzgkh4si_*F^(@zEsQTMNJ%V7mCVZ$X9CGEfn(8tJ0DiM4kN^Mx diff --git a/src/tests/databases/core_8.db b/src/tests/databases/core_8.db index 2335d721dab4160bdbd2bf70d793f81c91ed1caf..b2d77e66edc9c67c875722079c309edfacce8d88 100644 GIT binary patch delta 144 zcmZp8z}WDBae_1>|3n#QR(=M(vd)bu3-p<#c)};M8yGY3gl`rUkmk|Rl4oV$wA4;d z%}Y%x&CE&BD=5k@%S=fviZ3n7DK9VAOU)}z%}vVD%P&gb+-&11Z0P7{Xk=_+YNlr_ gnU^KV#LA$^$pjZOVwxqy2ohq33mI(8tJ0Ip#syZ`_I delta 96 zcmZp8z}WDBae_1>??f4AR$d0ZaIcLi3-p;Kd73A)8yIgE6!7HP+-&11Y#8Di9AX?_ qoLcBCnU^KV#LA$^$pjKJ0*N^>%@Se+2{D6&3_(JUoB#SVD*yl!1sY8N diff --git a/src/tests/databases/core_9.db b/src/tests/databases/core_9.db deleted file mode 100644 index e69de29b..00000000 diff --git a/src/tests/databases/ensembl_genome_metadata.db b/src/tests/databases/ensembl_genome_metadata.db index 93d6be945a0dc8de094373425f819a4738bac851..ca2bee236ce39ca411e07fe029d6d6530d638152 100644 GIT binary patch delta 1231 zcmZo@5NK!+m>?~v#lXPe0K_mLK2gV*QEOwu!t>m0f($GSbqt*940V%vE=aSm2{IQ? zHWX0h05ZIQ*bT^0<7$)SVHcN_WbEOcEPd_wWQXhPI0}mL%Q90^izdIlCe5wj;_BuY z>Jy@%u0B2AlTl(ihZm#dX4xBDjGJd&E@xulR$vy}Y$zbayjlI`EjE4ja0U@pF9xPyQrGKh;#mV6zkX=Gq%p=)50oS&OpnwMEp$&gu|UYs1@ z9_&=d3Q_GWEzQWlAU!$NRAut=cjl@_21Z7@2Ii@G#i_YTISj54x{d`^b)vb-WR>^k zY7o^HXsR0p7$M4|K~9kso?Q3dj){?R@}BqV3J@1rB3zVMoNH=fQfB1p=g576AqD7_ z1c~}aACLh|j8NBELc!#BCNh&fo>{1)*$cJ5k*U!WtdGeSWU;jHWXKX>SyVN)8q1ZEd{qq9y}?drvXqtuMx1P- zxv{aSsX@|Y!{>UFZ#`4e2Zkju>TjCBo+VW}y-C^^FzmbNP7A?mfE>W>?$OkV#+aJu#_Mot@z z;Eep-{P^O;f=r<1cw=**JAo!yqL>6r({;kAZsdD|%Z+tB5S8Z8WZeBG6qK6GfgylM zO~CYPWNrpdzjd6bCiA?-X)-&i%FegZV%Q^R(+Be-j2zQFw=gQTM{QvQVy5j;TbP%5 F0|0s3P+kB4 delta 1119 zcmZo@5NK!+m>?~v$iTqh0K_mLG*QQxQE_9!!t>nR3JfgFvluwl*#sx^T#%mZAaI70 z1Eh{yfoaiZLxFirn`d1vXX0uT=3y6?lw|B-oZNBYH+xxXQE_H|-sFonB_~&2+%f&D zJ0t(*f=gPAo7HdLV$%oe7hw}&;Lqdx$XCMqowthTFHa5kf9@7;F0M&jqMS20r8(wu z$g$67mtdRBCbC&j;R&liO`{JlL#QJYqqDfQG|&x`Pd|^*cJ&K3urx9-GSW4$NX;uw z%}vT_WNh>XD`c`1mzL#VkWDl=A2qe8%Hl0lCjWYAflX<3 zGDs;SJY*i2t4ywbWsX&8464%WucEOki$qm6?+sREWnmCys`8*Hk(?~|)AR6?+0V_T^@@b$ZB47DGyPt4K@C_ zp~~dir~K0w+ychp<+p+`y9|Lzzz}GcC9+-S)xxNL;D3kJ5A{3{W#(Wf$xiNn7YdJS zbL6;AEY3ADH!Cx$=0r31Jyvt8*-@4C<50$isx169cGFllC2(pAn5HK`MElkRc dHhnNZ!pJ$@a|@$Vd(;+2AZFSgwS{?^Hvoy%G4=oe diff --git a/src/tests/test_dataset_factory.py b/src/tests/test_dataset_factory.py index 2673b6b6..856f5bd9 100644 --- a/src/tests/test_dataset_factory.py +++ b/src/tests/test_dataset_factory.py @@ -101,13 +101,13 @@ def test_genebuild_workflow(self, test_dbs, dataset_factory): genebuild_uuid = 'a3352834-cea1-40aa-9dad-99981620c36b' # Test children creation with metadata_db.test_session_scope() as session: - genome = Genome(genebuild_version="1.0", - production_name="new_grch37", + genome = Genome(production_name="new_grch37", assembly_id=40, created=func.now(), organism_id=9, annotation_source="test", - genebuild_date="2026-04" + genebuild_date="2026-04", + provider_name="test" ) session.add(genome) genebuild = Dataset( diff --git a/src/tests/test_exports.py b/src/tests/test_exports.py new file mode 100644 index 00000000..2abcbe2f --- /dev/null +++ b/src/tests/test_exports.py @@ -0,0 +1,1101 @@ +# See the NOTICE file distributed with this work for additional information +# regarding copyright ownership. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import csv +import json +from pathlib import Path + +import pytest + +from ensembl.production.metadata.api.exports.changelog_generator import ChangelogGenerator +from ensembl.production.metadata.api.exports.ftp_index import FTPMetadataExporter +from ensembl.production.metadata.api.exports.stats_generator import StatsGenerator +from ensembl.production.metadata.api.models import Genome, ReleaseStatus, EnsemblRelease + +db_directory = Path(__file__).parent / 'databases' +db_directory = db_directory.resolve() + + +@pytest.mark.parametrize("test_dbs", [[{'src': Path(__file__).parent / "databases/ensembl_genome_metadata"}, + {'src': Path(__file__).parent / "databases/ncbi_taxonomy"}, + ]], indirect=True) +class TestStatsGenerator: + """Test suite for StatsGenerator class.""" + + def test_init_valid_uri(self, test_dbs): + """Test StatsGenerator initialization with valid metadata URI.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + generator = StatsGenerator(metadata_uri) + assert generator.metadata_db is not None + assert generator.output_path == Path.cwd() + + def test_init_with_output_path(self, test_dbs, tmp_path): + """Test StatsGenerator initialization with custom output path.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + output_path = tmp_path / "test_output" + generator = StatsGenerator(metadata_uri, output_path=str(output_path)) + assert generator.metadata_db is not None + assert generator.output_path == output_path + assert output_path.exists() + + def test_init_invalid_uri_empty(self, test_dbs): + """Test StatsGenerator initialization fails with empty URI.""" + with pytest.raises(ValueError) as excinfo: + StatsGenerator("") + assert "metadata_uri must be a non-empty string" in str(excinfo.value) + + def test_init_invalid_uri_none(self, test_dbs): + """Test StatsGenerator initialization fails with None URI.""" + with pytest.raises(ValueError) as excinfo: + StatsGenerator(None) + assert "metadata_uri must be a non-empty string" in str(excinfo.value) + + def test_init_invalid_uri_not_string(self, test_dbs): + """Test StatsGenerator initialization fails with non-string URI.""" + with pytest.raises(ValueError) as excinfo: + StatsGenerator(123) + assert "metadata_uri must be a non-empty string" in str(excinfo.value) + + def test_get_partial_data(self, test_dbs): + """Test get_partial_data returns correct structure and values.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + generator = StatsGenerator(metadata_uri) + partial_data = generator.get_partial_data() + assert isinstance(partial_data, list) + if len(partial_data) > 0: + first_release = partial_data[0] + required_keys = [ + 'release', 'new_genomes', 'total_genomes', + 'new_assemblies', 'total_assemblies', + 'new_variation_datasets', 'total_variation_datasets', + 'new_regulation_datasets', 'total_regulation_datasets' + ] + for key in required_keys: + assert key in first_release, f"Missing key: {key}" + assert isinstance(first_release['release'], str) + assert isinstance(first_release['new_genomes'], int) + assert isinstance(first_release['total_genomes'], int) + assert isinstance(first_release['new_assemblies'], int) + assert isinstance(first_release['total_assemblies'], int) + assert isinstance(first_release['new_variation_datasets'], int) + assert isinstance(first_release['total_variation_datasets'], int) + assert isinstance(first_release['new_regulation_datasets'], int) + assert isinstance(first_release['total_regulation_datasets'], int) + # Verify cumulative totals are non-decreasing + for i in range(1, len(partial_data)): + assert partial_data[i]['total_genomes'] >= partial_data[i - 1]['total_genomes'] + assert partial_data[i]['total_assemblies'] >= partial_data[i - 1]['total_assemblies'] + assert partial_data[i]['total_variation_datasets'] >= partial_data[i - 1]['total_variation_datasets'] + assert partial_data[i]['total_regulation_datasets'] >= partial_data[i - 1]['total_regulation_datasets'] + + assert len(partial_data) == 2 + assert partial_data[0]['release'] == '2020-10-18' + assert partial_data[0]['new_genomes'] == 3 + + def test_get_partial_data_specific_values(self, test_dbs): + """Test get_partial_data returns specific expected values from test database.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + generator = StatsGenerator(metadata_uri) + partial_data = generator.get_partial_data() + + assert len(partial_data) == 2 + if len(partial_data) >= 1: + assert partial_data[0]['release'] == '2020-10-18' + assert partial_data[0]['new_genomes'] == 3 + assert partial_data[0]['total_genomes'] == 3 + + def test_get_integrated_data(self, test_dbs): + """Test get_integrated_data returns correct structure and values.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + generator = StatsGenerator(metadata_uri) + integrated_data = generator.get_integrated_data() + + assert isinstance(integrated_data, list) + + # Don't actually have any integrated data in the test db. + # TODO: Add some integrated data. + if len(integrated_data) > 0: + first_release = integrated_data[0] + required_keys = [ + 'release', 'genomes', 'assemblies', + 'variation_datasets', 'regulation_datasets' + ] + for key in required_keys: + assert key in first_release, f"Missing key: {key}" + assert isinstance(first_release['release'], str) + assert isinstance(first_release['genomes'], int) + assert isinstance(first_release['assemblies'], int) + assert isinstance(first_release['variation_datasets'], int) + assert isinstance(first_release['regulation_datasets'], int) + + assert len(integrated_data) == 0 + assert integrated_data[0]['release'] == '2025-01' + assert integrated_data[0]['genomes'] == 12 + + # def test_get_integrated_data_specific_values(self, test_dbs): + # """Test get_integrated_data returns specific expected values from test database.""" + # metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + # generator = StatsGenerator(metadata_uri) + # + # integrated_data = generator.get_integrated_data() + # + # assert len(integrated_data) == 2 + # if len(integrated_data) >= 1: + # assert integrated_data[0]['release'] == '112' + # assert integrated_data[0]['genomes'] == 50 + # assert integrated_data[0]['assemblies'] == 45 + + def test_count_datasets(self, test_dbs): + """Test _count_datasets returns correct count for a specific release and dataset type.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + generator = StatsGenerator(metadata_uri) + with generator.metadata_db.session_scope() as session: + release_id = 1 + variation_count = generator._count_datasets(session, release_id, 'variation') + assert variation_count == 3 + + regulation_count = generator._count_datasets(session, release_id, 'regulatory_features') + assert regulation_count == 0 + pass + + def test_count_and_get_dataset_ids(self, test_dbs): + """Test _count_and_get_dataset_ids returns correct count and IDs.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + generator = StatsGenerator(metadata_uri) + + with generator.metadata_db.session_scope() as session: + release_id = 1 + count, dataset_ids = generator._count_and_get_dataset_ids( + session, release_id, 'variation' + ) + + assert isinstance(count, int) + assert isinstance(dataset_ids, set) + assert count == len(dataset_ids) + assert count == 3 + pass + + def test_export_to_csv(self, test_dbs, tmp_path): + """Test export_to_csv creates files with correct structure.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + output_path = tmp_path / "csv_output" + generator = StatsGenerator(metadata_uri, output_path=str(output_path)) + + # Create sample data + partial_data = [ + { + 'release': 'R1', + 'new_genomes': 10, + 'total_genomes': 10, + 'new_assemblies': 8, + 'total_assemblies': 8, + 'new_variation_datasets': 5, + 'total_variation_datasets': 5, + 'new_regulation_datasets': 3, + 'total_regulation_datasets': 3, + } + ] + + integrated_data = [ + { + 'release': 'R1', + 'genomes': 10, + 'assemblies': 8, + 'variation_datasets': 5, + 'regulation_datasets': 3, + } + ] + + generator.export_to_csv(partial_data, integrated_data) + + partial_file = output_path / 'stats.partial.csv' + integrated_file = output_path / 'stats.integrated.csv' + assert partial_file.exists() + assert integrated_file.exists() + with open(partial_file, 'r') as f: + reader = csv.DictReader(f) + rows = list(reader) + assert len(rows) == 1 + assert rows[0]['release'] == 'R1' + assert rows[0]['new_genomes'] == '10' + assert rows[0]['total_genomes'] == '10' + with open(integrated_file, 'r') as f: + reader = csv.DictReader(f) + rows = list(reader) + assert len(rows) == 1 + assert rows[0]['release'] == 'R1' + assert rows[0]['genomes'] == '10' + assert rows[0]['assemblies'] == '8' + + def test_export_to_csv_sorting(self, test_dbs, tmp_path): + """Test export_to_csv sorts data by release label.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + output_path = tmp_path / "csv_output_sorted" + generator = StatsGenerator(metadata_uri, output_path=str(output_path)) + partial_data = [ + {'release': 'R3', 'new_genomes': 30, 'total_genomes': 60, + 'new_assemblies': 20, 'total_assemblies': 50, + 'new_variation_datasets': 10, 'total_variation_datasets': 30, + 'new_regulation_datasets': 5, 'total_regulation_datasets': 15}, + {'release': 'R1', 'new_genomes': 10, 'total_genomes': 10, + 'new_assemblies': 8, 'total_assemblies': 8, + 'new_variation_datasets': 5, 'total_variation_datasets': 5, + 'new_regulation_datasets': 3, 'total_regulation_datasets': 3}, + {'release': 'R2', 'new_genomes': 20, 'total_genomes': 30, + 'new_assemblies': 12, 'total_assemblies': 20, + 'new_variation_datasets': 5, 'total_variation_datasets': 10, + 'new_regulation_datasets': 2, 'total_regulation_datasets': 5}, + ] + + generator.export_to_csv(partial_data, []) + partial_file = output_path / 'stats.partial.csv' + assert partial_file.exists() + + with open(partial_file, 'r') as f: + reader = csv.DictReader(f) + rows = list(reader) + assert len(rows) == 3 + assert rows[0]['release'] == 'R1' + assert rows[1]['release'] == 'R2' + assert rows[2]['release'] == 'R3' + + def test_export_to_csv_empty_data(self, test_dbs, tmp_path): + """Test export_to_csv handles empty data correctly.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + output_path = tmp_path / "csv_output_empty" + generator = StatsGenerator(metadata_uri, output_path=str(output_path)) + generator.export_to_csv([], []) + + partial_file = output_path / 'stats.partial.csv' + integrated_file = output_path / 'stats.integrated.csv' + + assert partial_file.exists() + assert integrated_file.exists() + with open(partial_file, 'r') as f: + reader = csv.DictReader(f) + rows = list(reader) + assert len(rows) == 0 + + with open(integrated_file, 'r') as f: + reader = csv.DictReader(f) + rows = list(reader) + assert len(rows) == 0 + + def test_generate_integration(self, test_dbs, tmp_path): + """Test generate method integrates all components correctly.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + output_path = tmp_path / "generate_output" + generator = StatsGenerator(metadata_uri, output_path=str(output_path)) + generator.generate() + partial_file = output_path / 'stats.partial.csv' + integrated_file = output_path / 'stats.integrated.csv' + + assert partial_file.exists() + assert integrated_file.exists() + with open(partial_file, 'r') as f: + reader = csv.DictReader(f) + assert reader.fieldnames is not None + partial_fieldnames = [ + 'release', 'new_genomes', 'total_genomes', + 'new_assemblies', 'total_assemblies', + 'new_variation_datasets', 'total_variation_datasets', + 'new_regulation_datasets', 'total_regulation_datasets' + ] + assert reader.fieldnames == partial_fieldnames + + with open(integrated_file, 'r') as f: + reader = csv.DictReader(f) + assert reader.fieldnames is not None + integrated_fieldnames = [ + 'release', 'genomes', 'assemblies', + 'variation_datasets', 'regulation_datasets' + ] + assert reader.fieldnames == integrated_fieldnames + + def test_partial_data_ordering(self, test_dbs): + """Test that partial data is returned in correct order by release label.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + generator = StatsGenerator(metadata_uri) + partial_data = generator.get_partial_data() + if len(partial_data) > 1: + release_labels = [item['release'] for item in partial_data] + assert release_labels == sorted(release_labels) + + def test_integrated_data_ordering(self, test_dbs): + """Test that integrated data is returned in correct order by release label.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + generator = StatsGenerator(metadata_uri) + integrated_data = generator.get_integrated_data() + if len(integrated_data) > 1: + release_labels = [item['release'] for item in integrated_data] + assert release_labels == sorted(release_labels) + + +@pytest.mark.parametrize("test_dbs", [[{'src': Path(__file__).parent / "databases/ensembl_genome_metadata"}, + {'src': Path(__file__).parent / "databases/ncbi_taxonomy"}, + ]], indirect=True) +class TestFTPMetadataExporter: + """Test suite for FTPMetadataExporter class.""" + + def test_init_valid_uri(self, test_dbs): + """Test FTPMetadataExporter initialization with valid metadata URI.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + exporter = FTPMetadataExporter(metadata_uri) + assert exporter.metadata_db is not None + + def test_export_to_json_returns_dict(self, test_dbs): + """Test export_to_json returns dictionary when no output file specified.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + exporter = FTPMetadataExporter(metadata_uri) + result = exporter.export_to_json() + assert isinstance(result, dict) + assert 'last_updated' in result + assert 'species' in result + assert isinstance(result['species'], dict) + + def test_export_to_json_creates_file(self, test_dbs, tmp_path): + """Test export_to_json creates file when output_file is specified.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + exporter = FTPMetadataExporter(metadata_uri) + output_file = tmp_path / "ftp_metadata.json" + result = exporter.export_to_json(str(output_file)) + assert result is None + assert output_file.exists() + with open(output_file, 'r') as f: + data = json.load(f) + assert 'last_updated' in data + assert 'species' in data + + def test_build_ftp_metadata_json_structure(self, test_dbs): + """Test build_ftp_metadata_json returns correct structure.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + exporter = FTPMetadataExporter(metadata_uri) + metadata = exporter.build_ftp_metadata_json() + assert isinstance(metadata, dict) + assert 'last_updated' in metadata + assert 'species' in metadata + assert isinstance(metadata['species'], dict) + first_species = next(iter(metadata['species'].values())) + assert 'assemblies' in first_species + assert isinstance(first_species['assemblies'], dict) + + def test_load_all_genome_data(self, test_dbs): + """Test _load_all_genome_data returns correct structure.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + exporter = FTPMetadataExporter(metadata_uri) + with exporter.metadata_db.session_scope() as session: + genome_data = exporter._load_all_genome_data(session) + assert isinstance(genome_data, dict) + + first_genome_uuid = next(iter(genome_data.keys())) + first_genome_data = genome_data[first_genome_uuid] + assert 'genome' in first_genome_data + assert 'datasets' in first_genome_data + assert 'attributes' in first_genome_data + assert 'genebuild_metadata' in first_genome_data + assert isinstance(first_genome_data['datasets'], list) + assert isinstance(first_genome_data['attributes'], dict) + + def test_normalize_species_name(self, test_dbs): + """Test _normalize_species_name correctly normalizes species names.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + exporter = FTPMetadataExporter(metadata_uri) + assert exporter._normalize_species_name('homo sapiens') == 'homo_sapiens' + assert exporter._normalize_species_name('species.name') == 'species_name' + assert exporter._normalize_species_name('species__name') == 'species_name' + assert exporter._normalize_species_name('species___name') == 'species_name' + assert exporter._normalize_species_name('homo. sapiens') == 'homo_sapiens' + assert exporter._normalize_species_name('homo sapiens') == 'homo_sapiens' + assert exporter._normalize_species_name('') == '' + assert exporter._normalize_species_name('homo_sapiens') == 'homo_sapiens' + assert exporter._normalize_species_name('Homo. Sapiens') == 'Homo_Sapiens' + assert exporter._normalize_species_name('homo sapiens') == 'homo_sapiens' + assert exporter._normalize_species_name(' homo sapiens ') == '_homo_sapiens_' + + def test_extract_provider_from_path(self, test_dbs): + """Test _extract_provider_from_path extracts provider correctly.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + exporter = FTPMetadataExporter(metadata_uri) + genebuild_metadata = { + 'genebuild_source_name': 'Ensembl' + } + assert exporter._extract_provider_from_path(genebuild_metadata) == 'ensembl' + genebuild_metadata = { + 'genebuild_source_name': 'REFSEQ' + } + assert exporter._extract_provider_from_path(genebuild_metadata) == 'refseq' + assert exporter._extract_provider_from_path(None) == 'unknown' + assert exporter._extract_provider_from_path({}) == 'unknown' + + def test_extract_genebuild_release_info(self, test_dbs): + """Test _extract_genebuild_release_info extracts release correctly.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + exporter = FTPMetadataExporter(metadata_uri) + genebuild_metadata = { + 'last_geneset_update': '2024-01-01' + } + result = exporter._extract_genebuild_release_info(genebuild_metadata) + assert result['release'] == '2024_01' + genebuild_metadata = { + 'last_geneset_update': '2023-12-15' + } + result = exporter._extract_genebuild_release_info(genebuild_metadata) + assert result['release'] == '2023_12' + result = exporter._extract_genebuild_release_info(None) + assert result['release'] == 'unknown' + result = exporter._extract_genebuild_release_info({}) + assert result['release'] == 'unknown' + genebuild_metadata = { + 'last_geneset_update': 'invalid-date' + } + result = exporter._extract_genebuild_release_info(genebuild_metadata) + assert result['release'] == 'unknown' + + def test_extract_release_info_from_ensembl_release(self, test_dbs): + """Test _extract_release_info_from_ensembl_release extracts release correctly.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + exporter = FTPMetadataExporter(metadata_uri) + with exporter.metadata_db.session_scope() as session: + genome = session.query(Genome).first() + + if genome: + result = exporter._extract_release_info_from_ensembl_release(genome) + + assert isinstance(result, dict) + assert 'release' in result + has_released = any( + gr.ensembl_release and gr.ensembl_release.status == ReleaseStatus.RELEASED + for gr in genome.genome_releases + ) + if has_released: + assert result['release'] != 'unknown' + + def test_has_released_dataset_bulk(self, test_dbs): + """Test _has_released_dataset_bulk correctly identifies dataset types.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + exporter = FTPMetadataExporter(metadata_uri) + + datasets = [ + {'dataset_type_name': 'genebuild'}, + {'dataset_type_name': 'assembly'} + ] + assert exporter._has_released_dataset_bulk(datasets, 'genebuild') is True + assert exporter._has_released_dataset_bulk(datasets, 'assembly') is True + assert exporter._has_released_dataset_bulk(datasets, 'variation') is False + datasets = [ + {'dataset_type_name': 'regulatory_features'} + ] + assert exporter._has_released_dataset_bulk(datasets, 'regulation') is True + assert exporter._has_released_dataset_bulk([], 'genebuild') is False + + def test_get_dataset_file_paths_genebuild(self, test_dbs): + """Test _get_dataset_file_paths generates correct file paths for genebuild.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + exporter = FTPMetadataExporter(metadata_uri) + base_path = "homo_sapiens/GRCh38/ensembl/geneset/2024_01" + with exporter.metadata_db.session_scope() as session: + genome = session.query(Genome).first() + assembly_data = {'accession': 'GRCh38'} if genome else {} + file_paths = exporter._get_dataset_file_paths( + base_path, 'genebuild', genome, assembly_data + ) + + assert 'annotations' in file_paths + assert 'cdna.fa.gz' in file_paths['annotations'] + assert 'genes.gff3.gz' in file_paths['annotations'] + assert 'genes.gtf.gz' in file_paths['annotations'] + assert 'pep.fa.gz' in file_paths['annotations'] + assert 'vep' in file_paths + assert 'genes.gff3.bgz' in file_paths['vep'] + + def test_get_dataset_file_paths_assembly(self, test_dbs): + """Test _get_dataset_file_paths generates correct file paths for assembly.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + exporter = FTPMetadataExporter(metadata_uri) + base_path = "homo_sapiens/GRCh38/genome" + with exporter.metadata_db.session_scope() as session: + genome = session.query(Genome).first() + assembly_data = {'accession': 'GRCh38'} if genome else {} + file_paths = exporter._get_dataset_file_paths( + base_path, 'assembly', genome, assembly_data + ) + assert 'genome_sequences' in file_paths + assert 'chromosomes.tsv.gz' in file_paths['genome_sequences'] + assert 'hardmasked.fa.gz' in file_paths['genome_sequences'] + assert 'softmasked.fa.gz' in file_paths['genome_sequences'] + assert 'unmasked.fa.gz' in file_paths['genome_sequences'] + assert 'vep' in file_paths + assert 'softmasked.fa.bgz' in file_paths['vep'] + + def test_get_dataset_file_paths_variation(self, test_dbs): + """Test _get_dataset_file_paths generates correct file paths for variation.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + exporter = FTPMetadataExporter(metadata_uri) + base_path = "homo_sapiens/GRCh38/ensembl/variation/2024_01" + with exporter.metadata_db.session_scope() as session: + genome = session.query(Genome).first() + assembly_data = {'accession': 'GRCh38'} if genome else {} + file_paths = exporter._get_dataset_file_paths( + base_path, 'variation', genome, assembly_data + ) + assert 'variation_data' in file_paths + assert 'variation.vcf.gz' in file_paths['variation_data'] + + def test_get_dataset_file_paths_regulation(self, test_dbs): + """Test _get_dataset_file_paths generates correct file paths for regulation.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + exporter = FTPMetadataExporter(metadata_uri) + base_path = "homo_sapiens/GRCh38/ensembl/regulation" + with exporter.metadata_db.session_scope() as session: + genome = session.query(Genome).first() + assembly_data = {'accession': 'GRCh38'} if genome else {} + file_paths = exporter._get_dataset_file_paths( + base_path, 'regulation', genome, assembly_data + ) + + assert 'regulatory_features' in file_paths + assert 'regulation.gff' in file_paths['regulatory_features'] + + def test_get_dataset_file_paths_homologies(self, test_dbs): + """Test _get_dataset_file_paths generates correct file paths for homologies.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + exporter = FTPMetadataExporter(metadata_uri) + base_path = "homo_sapiens/GRCh38/ensembl/homology/2024_01" + with exporter.metadata_db.session_scope() as session: + genome = session.query(Genome).first() + if genome: + assembly_data = {'accession': genome.assembly.accession} + file_paths = exporter._get_dataset_file_paths( + base_path, 'homologies', genome, assembly_data + ) + assert 'homology_data' in file_paths + homology_files = file_paths['homology_data'] + assert len(homology_files) > 0 + first_file = next(iter(homology_files.keys())) + assert 'homology.tsv.gz' in first_file + + def test_export_json_with_actual_data(self, test_dbs): + """Test export generates valid JSON structure with actual database data.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + exporter = FTPMetadataExporter(metadata_uri) + metadata = exporter.export_to_json() + assert metadata is not None + assert 'last_updated' in metadata + assert 'species' in metadata + + for species_name, species_data in metadata['species'].items(): + assert isinstance(species_name, str) + assert 'assemblies' in species_data + for assembly_name, assembly_data in species_data['assemblies'].items(): + assert isinstance(assembly_name, str) + if 'providers' in assembly_data: + for provider_name, provider_data in assembly_data['providers'].items(): + assert isinstance(provider_name, str) + if 'releases' in provider_data: + for release_name, release_data in provider_data['releases'].items(): + assert isinstance(release_name, str) + if 'datasets' in release_data: + assert isinstance(release_data['datasets'], dict) + + def test_export_handles_empty_database(self, test_dbs): + """Test export handles database with no released genomes gracefully.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + exporter = FTPMetadataExporter(metadata_uri) + metadata = exporter.export_to_json() + assert metadata is not None + assert 'last_updated' in metadata + assert 'species' in metadata + assert isinstance(metadata['species'], dict) + + def test_json_file_is_valid_json(self, test_dbs, tmp_path): + """Test that exported JSON file can be read back and is valid.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + exporter = FTPMetadataExporter(metadata_uri) + output_file = tmp_path / "test_output.json" + exporter.export_to_json(str(output_file)) + + with open(output_file, 'r') as f: + data = json.load(f) + assert 'last_updated' in data + assert 'species' in data + from datetime import datetime + try: + datetime.fromisoformat(data['last_updated']) + except ValueError: + pytest.fail("last_updated is not in valid ISO format") + + +@pytest.mark.parametrize("test_dbs", [[{'src': Path(__file__).parent / "databases/ensembl_genome_metadata"}, + {'src': Path(__file__).parent / "databases/ncbi_taxonomy"}, + ]], indirect=True) +class TestChangelogGenerator: + """Test suite for ChangelogGenerator class.""" + + def test_init_valid_parameters(self, test_dbs): + """Test ChangelogGenerator initialization with valid parameters.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + generator = ChangelogGenerator( + metadata_uri=metadata_uri, + release_label="2024-01" + ) + assert generator.metadata_db is not None + assert generator.release_label == "2024-01" + assert generator.output_path is None + + def test_init_with_output_path(self, test_dbs): + """Test ChangelogGenerator initialization with custom output path.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + output_path = "/tmp/test_changelog.csv" + + generator = ChangelogGenerator( + metadata_uri=metadata_uri, + release_label="2024-01", + output_path=output_path + ) + assert generator.metadata_db is not None + assert generator.release_label == "2024-01" + assert generator.output_path == output_path + + def test_init_invalid_metadata_uri_empty(self, test_dbs): + """Test initialization fails with empty metadata URI.""" + with pytest.raises(ValueError) as excinfo: + ChangelogGenerator( + metadata_uri="", + release_label="2024-01" + ) + assert "metadata_uri must be a non-empty string" in str(excinfo.value) + + def test_init_invalid_metadata_uri_none(self, test_dbs): + """Test initialization fails with None metadata URI.""" + with pytest.raises(ValueError) as excinfo: + ChangelogGenerator( + metadata_uri=None, + release_label="2024-01" + ) + assert "metadata_uri must be a non-empty string" in str(excinfo.value) + + def test_init_invalid_metadata_uri_not_string(self, test_dbs): + """Test initialization fails with non-string metadata URI.""" + with pytest.raises(ValueError) as excinfo: + ChangelogGenerator( + metadata_uri=123, + release_label="2024-01" + ) + assert "metadata_uri must be a non-empty string" in str(excinfo.value) + + def test_init_invalid_release_label_empty(self, test_dbs): + """Test initialization fails with empty release label.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + + with pytest.raises(ValueError) as excinfo: + ChangelogGenerator( + metadata_uri=metadata_uri, + release_label="" + ) + assert "release_label must be a non-empty string" in str(excinfo.value) + + def test_init_invalid_release_label_none(self, test_dbs): + """Test initialization fails with None release label.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + + with pytest.raises(ValueError) as excinfo: + ChangelogGenerator( + metadata_uri=metadata_uri, + release_label=None + ) + assert "release_label must be a non-empty string" in str(excinfo.value) + + def test_init_invalid_release_label_not_string(self, test_dbs): + """Test initialization fails with non-string release label.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + with pytest.raises(ValueError) as excinfo: + ChangelogGenerator( + metadata_uri=metadata_uri, + release_label=123 + ) + assert "release_label must be a non-empty string" in str(excinfo.value) + + def test_verify_release_exists(self, test_dbs): + """Test verify_release returns correct type for existing release.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + with test_dbs['ensembl_genome_metadata'].dbc.session_scope() as session: + release = session.query(EnsemblRelease).first() + if release: + generator = ChangelogGenerator( + metadata_uri=metadata_uri, + release_label=release.label + ) + release_type = generator.verify_release() + assert release_type in ['partial', 'integrated'] + assert release_type == release.release_type + + def test_verify_release_not_found(self, test_dbs): + """Test verify_release raises error for non-existent release.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + generator = ChangelogGenerator( + metadata_uri=metadata_uri, + release_label="nonexistent-release-99999" + ) + with pytest.raises(ValueError) as excinfo: + generator.verify_release() + assert "Release not found" in str(excinfo.value) + assert "nonexistent-release-99999" in str(excinfo.value) + + def test_gather_partial_data_structure(self, test_dbs): + """Test gather_partial_data returns correct structure.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + + with test_dbs['ensembl_genome_metadata'].dbc.session_scope() as session: + partial_release = session.query(EnsemblRelease).filter( + EnsemblRelease.release_type == 'partial' + ).first() + + if partial_release: + generator = ChangelogGenerator( + metadata_uri=metadata_uri, + release_label=partial_release.label + ) + + data = generator.gather_partial_data() + + assert isinstance(data, list) + first_entry = data[0] + required_keys = [ + 'scientific_name', 'common_name', 'assembly_name', + 'assembly_accession', 'annotation_provider', + 'geneset_updated', 'variation_updated', 'regulation_updated' + ] + for key in required_keys: + assert key in first_entry, f"Missing key: {key}" + assert isinstance(first_entry['scientific_name'], str) + assert first_entry['common_name'] is None or isinstance(first_entry['common_name'], str) + assert isinstance(first_entry['assembly_name'], str) + assert isinstance(first_entry['assembly_accession'], str) + assert first_entry['annotation_provider'] is None or isinstance(first_entry['annotation_provider'], + str) + assert isinstance(first_entry['geneset_updated'], int) + assert isinstance(first_entry['variation_updated'], int) + assert isinstance(first_entry['regulation_updated'], int) + assert first_entry['geneset_updated'] in [0, 1] + assert first_entry['variation_updated'] in [0, 1] + assert first_entry['regulation_updated'] in [0, 1] + + def test_gather_integrated_data_structure(self, test_dbs): + """Test gather_integrated_data returns correct structure.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + with test_dbs['ensembl_genome_metadata'].dbc.session_scope() as session: + integrated_release = session.query(EnsemblRelease).filter( + EnsemblRelease.release_type == 'integrated' + ).first() + if integrated_release: + generator = ChangelogGenerator( + metadata_uri=metadata_uri, + release_label=integrated_release.label + ) + data = generator.gather_integrated_data() + assert isinstance(data, list) + if len(data) > 0: + first_entry = data[0] + required_keys = [ + 'scientific_name', 'common_name', 'assembly_name', + 'assembly_accession', 'annotation_provider', + 'geneset_updated', 'variation_updated', 'regulation_updated', + 'status' + ] + for key in required_keys: + assert key in first_entry, f"Missing key: {key}" + assert first_entry['status'] in ['New', 'Removed', 'Updated', 'Unchanged'] + + def test_get_annotation_sources_bulk(self, test_dbs): + """Test _get_annotation_sources_bulk retrieves annotation sources.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + with test_dbs['ensembl_genome_metadata'].dbc.session_scope() as session: + partial_release = session.query(EnsemblRelease).filter( + EnsemblRelease.release_type == 'partial' + ).first() + if partial_release: + generator = ChangelogGenerator( + metadata_uri=metadata_uri, + release_label=partial_release.label + ) + from ensembl.production.metadata.api.models import Genome, GenomeDataset + genome_ids = [gr.genome_id for gr in session.query(GenomeDataset.genome_id).filter( + GenomeDataset.release_id == partial_release.release_id + ).distinct().limit(5).all()] + if genome_ids: + annotation_sources = generator._get_annotation_sources_bulk( + session, genome_ids + ) + assert isinstance(annotation_sources, dict) + for genome_id in annotation_sources.keys(): + assert isinstance(genome_id, int) + for source in annotation_sources.values(): + assert source is None or isinstance(source, str) + + def test_get_annotation_sources_bulk_empty_list(self, test_dbs): + """Test _get_annotation_sources_bulk handles empty genome list.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + with test_dbs['ensembl_genome_metadata'].dbc.session_scope() as session: + partial_release = session.query(EnsemblRelease).filter( + EnsemblRelease.release_type == 'partial' + ).first() + if partial_release: + generator = ChangelogGenerator( + metadata_uri=metadata_uri, + release_label=partial_release.label + ) + annotation_sources = generator._get_annotation_sources_bulk(session, []) + assert isinstance(annotation_sources, dict) + assert len(annotation_sources) == 0 + + def test_export_to_csv_partial_release(self, test_dbs, tmp_path): + """Test export_to_csv creates file with correct structure for partial release.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + output_file = tmp_path / "test_changelog.csv" + generator = ChangelogGenerator( + metadata_uri=metadata_uri, + release_label="2024-01", + output_path=str(output_file) + ) + + sample_data = [ + { + 'scientific_name': 'homo sapiens', + 'common_name': 'human', + 'assembly_name': 'GRCh38', + 'assembly_accession': 'GCA_000001405.15', + 'annotation_provider': 'Ensembl', + 'geneset_updated': 1, + 'variation_updated': 0, + 'regulation_updated': 1 + } + ] + generator.export_to_csv(sample_data) + + assert output_file.exists() + with open(output_file, 'r') as f: + lines = f.readlines() + assert lines[0].startswith('# Changelog for release') + assert '2024-01' in lines[0] + reader = csv.DictReader(lines[1:]) + rows = list(reader) + assert len(rows) == 1 + assert rows[0]['scientific_name'] == 'homo sapiens' + assert rows[0]['geneset_updated'] == '1' + assert rows[0]['variation_updated'] == '0' + assert 'status' not in rows[0] # Partial releases don't have status + + def test_export_to_csv_integrated_release(self, test_dbs, tmp_path): + """Test export_to_csv creates file with correct structure for integrated release.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + output_file = tmp_path / "test_changelog_integrated.csv" + generator = ChangelogGenerator( + metadata_uri=metadata_uri, + release_label="112", + output_path=str(output_file) + ) + sample_data = [ + { + 'scientific_name': 'homo sapiens', + 'common_name': 'human', + 'assembly_name': 'GRCh38', + 'assembly_accession': 'GCA_000001405.15', + 'annotation_provider': 'Ensembl', + 'geneset_updated': '2024-01', + 'variation_updated': None, + 'regulation_updated': '2024-01', + 'status': 'Updated' + } + ] + generator.export_to_csv(sample_data) + assert output_file.exists() + with open(output_file, 'r') as f: + lines = f.readlines() + assert lines[0].startswith('# Changelog for release') + reader = csv.DictReader(lines[1:]) + rows = list(reader) + assert len(rows) == 1 + assert rows[0]['scientific_name'] == 'homo sapiens' + assert rows[0]['status'] == 'Updated' # Integrated releases have status + + def test_export_to_csv_default_output_path(self, test_dbs, tmp_path, monkeypatch): + """Test export_to_csv uses default output path when none specified.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + monkeypatch.chdir(tmp_path) + generator = ChangelogGenerator( + metadata_uri=metadata_uri, + release_label="2024-01" + ) + sample_data = [ + { + 'scientific_name': 'test species', + 'common_name': 'test', + 'assembly_name': 'test', + 'assembly_accession': 'test', + 'annotation_provider': 'test', + 'geneset_updated': 0, + 'variation_updated': 0, + 'regulation_updated': 0 + } + ] + generator.export_to_csv(sample_data) + default_file = tmp_path / "2024-01.csv" + assert default_file.exists() + + def test_export_to_csv_empty_data(self, test_dbs, tmp_path): + """Test export_to_csv handles empty data correctly.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + output_file = tmp_path / "test_empty.csv" + generator = ChangelogGenerator( + metadata_uri=metadata_uri, + release_label="2024-01", + output_path=str(output_file) + ) + generator.export_to_csv([]) + assert output_file.exists() + with open(output_file, 'r') as f: + lines = f.readlines() + assert lines[0].startswith('# Changelog for release') + assert len(lines) >= 2 + + def test_export_to_csv_creates_parent_directory(self, test_dbs, tmp_path): + """Test export_to_csv creates parent directories if they don't exist.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + output_file = tmp_path / "nested" / "directories" / "changelog.csv" + generator = ChangelogGenerator( + metadata_uri=metadata_uri, + release_label="2024-01", + output_path=str(output_file) + ) + sample_data = [ + { + 'scientific_name': 'test', + 'common_name': 'test', + 'assembly_name': 'test', + 'assembly_accession': 'test', + 'annotation_provider': 'test', + 'geneset_updated': 0, + 'variation_updated': 0, + 'regulation_updated': 0 + } + ] + generator.export_to_csv(sample_data) + assert output_file.exists() + assert output_file.parent.exists() + + def test_generate_partial_release(self, test_dbs, tmp_path): + """Test generate method works end-to-end for partial release.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + with test_dbs['ensembl_genome_metadata'].dbc.session_scope() as session: + partial_release = session.query(EnsemblRelease).filter( + EnsemblRelease.release_type == 'partial' + ).first() + output_file = tmp_path / "test_generate.csv" + generator = ChangelogGenerator( + metadata_uri=metadata_uri, + release_label=partial_release.label, + output_path=str(output_file) + ) + generator.generate() + assert output_file.exists() + + def test_generate_integrated_release(self, test_dbs, tmp_path): + """Test generate method works end-to-end for integrated release.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + + with test_dbs['ensembl_genome_metadata'].dbc.session_scope() as session: + integrated_release = session.query(EnsemblRelease).filter( + EnsemblRelease.release_type == 'integrated' + ).first() + if integrated_release: + output_file = tmp_path / "test_generate_integrated.csv" + generator = ChangelogGenerator( + metadata_uri=metadata_uri, + release_label=integrated_release.label, + output_path=str(output_file) + ) + generator.generate() + assert output_file.exists() + + def test_generate_invalid_release(self, test_dbs, tmp_path): + """Test generate method raises error for invalid release.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + output_file = tmp_path / "test_invalid.csv" + generator = ChangelogGenerator( + metadata_uri=metadata_uri, + release_label="invalid-release-999", + output_path=str(output_file) + ) + with pytest.raises(ValueError) as excinfo: + generator.generate() + assert "Release not found" in str(excinfo.value) + + def test_gather_partial_data_no_genomes(self, test_dbs): + """Test gather_partial_data returns empty list when no genomes found.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + with test_dbs['ensembl_genome_metadata'].dbc.session_scope() as session: + partial_release = session.query(EnsemblRelease).filter( + EnsemblRelease.release_type == 'partial' + ).first() + generator = ChangelogGenerator( + metadata_uri=metadata_uri, + release_label=partial_release.label + ) + + data = generator.gather_partial_data() + assert isinstance(data, list) + + def test_csv_fieldnames_partial_vs_integrated(self, test_dbs, tmp_path): + """Test that CSV has different fieldnames for partial vs integrated releases.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + + # Partial release data + partial_file = tmp_path / "partial.csv" + generator_partial = ChangelogGenerator( + metadata_uri=metadata_uri, + release_label="2024-01", + output_path=str(partial_file) + ) + + partial_data = [{ + 'scientific_name': 'test', 'common_name': 'test', 'assembly_name': 'test', + 'assembly_accession': 'test', 'annotation_provider': 'test', + 'geneset_updated': 0, 'variation_updated': 0, 'regulation_updated': 0 + }] + + generator_partial.export_to_csv(partial_data) + + integrated_file = tmp_path / "integrated.csv" + generator_integrated = ChangelogGenerator( + metadata_uri=metadata_uri, + release_label="112", + output_path=str(integrated_file) + ) + + integrated_data = [{ + 'scientific_name': 'test', 'common_name': 'test', 'assembly_name': 'test', + 'assembly_accession': 'test', 'annotation_provider': 'test', + 'geneset_updated': '2024-01', 'variation_updated': None, 'regulation_updated': None, + 'status': 'New' + }] + + generator_integrated.export_to_csv(integrated_data) + + with open(partial_file, 'r') as f: + lines = f.readlines() + header = lines[1].strip() # Skip comment line + assert 'status' not in header + + with open(integrated_file, 'r') as f: + lines = f.readlines() + header = lines[1].strip() + assert 'status' in header diff --git a/src/tests/test_organism_to_organismgroup.py b/src/tests/test_organism_to_organismgroup.py deleted file mode 100644 index 97eef0c0..00000000 --- a/src/tests/test_organism_to_organismgroup.py +++ /dev/null @@ -1,90 +0,0 @@ -# See the NOTICE file distributed with this work for additional information -# regarding copyright ownership. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from collections import namedtuple -from pathlib import Path - -import pytest -from ensembl.utils.database import DBConnection - -from ensembl.production.metadata.api.models import OrganismGroup, Genome, Organism, OrganismGroupMember -from ensembl.production.metadata.scripts.organism_to_organismgroup import process_genomes - -db_directory = Path(__file__).parent / 'databases' -db_directory = db_directory.resolve() - -# Define a named tuple for script args -Args = namedtuple('Args', [ - 'metadata_db_uri', 'core_server_uri', 'organism_group_type', - 'organism_group_name', 'genome_uuid', 'release_id', 'remove', 'raise_error' -]) - - -@pytest.mark.parametrize("test_dbs", [[{'src': Path(__file__).parent / "databases/ensembl_genome_metadata"}, - {'src': Path(__file__).parent / "databases/ncbi_taxonomy"}, - {'src': Path(__file__).parent / "databases/core_1"}, - ]], - indirect=True) -class TestAddOrRemoveOrganismGroup: - dbc = None - - @pytest.mark.parametrize( - "genome_uuids, organism_group_type, organism_group_name, release_id, remove", - [ - ('a7335667-93e7-11ec-a39d-005056b38ce3', 'Test', 'EnsemblTest', '', False), - ('a7335667-93e7-11ec-a39d-005056b38ce3', 'Test', 'EnsemblTest', '', True), - - ] - ) - def test_add_organismgroup(self, test_dbs, genome_uuids, organism_group_type, organism_group_name, release_id, - remove): - metadata_db = DBConnection(test_dbs['ensembl_genome_metadata'].dbc.url) - args = Args( - metadata_db_uri=test_dbs['ensembl_genome_metadata'].dbc.url, - core_server_uri=None, - organism_group_type='Test', - organism_group_name='EnsemblTest', - genome_uuid=['a7335667-93e7-11ec-a39d-005056b38ce3'], - release_id=[], - remove=remove, - raise_error=False - ) - - # Mock the database connection - with metadata_db.session_scope() as session: - organism_group = session.query(OrganismGroup).filter( - OrganismGroup.name == args.organism_group_name, - OrganismGroup.type == args.organism_group_type - ).one_or_none() - - organism_group_id = organism_group.organism_group_id if organism_group else None - assert organism_group_id is not None - process_genomes(session, args, organism_group_id=organism_group_id) - session.commit() - # Check if the organism group was added - query = ( - session.query(Genome, Organism, OrganismGroup).join(Organism, Organism.organism_id == Genome.organism_id - ).join(OrganismGroupMember, - OrganismGroupMember.organism_id == Organism.organism_id - ).join(OrganismGroup, - OrganismGroup.organism_group_id == OrganismGroupMember.organism_group_id - ).filter( - Genome.genome_uuid.in_(args.genome_uuid), - OrganismGroup.name == args.organism_group_name, - ) - ) - if remove: - assert query.count() == 0, "Organism group member should be removed" - else: - assert query.count() > 0 - for genome, organism, organism_group in query.all(): - assert organism_group.name == args.organism_group_name, f"Expected {args.organism_group_name}, got {organism_group.name}" - assert organism_group.type == args.organism_group_type, f"Expected {args.organism_group_type}, got {organism_group.type}" diff --git a/src/tests/test_release_factory.py b/src/tests/test_release_factory.py index 305a0e6b..0eee9277 100644 --- a/src/tests/test_release_factory.py +++ b/src/tests/test_release_factory.py @@ -20,6 +20,7 @@ from ensembl.production.metadata.api.exceptions import MissingMetaException from ensembl.production.metadata.api.factories.genomes import GenomeFactory from ensembl.production.metadata.api.factories.release import ReleaseFactory +from ensembl.production.metadata.api.factories.utils import get_genome_sets_by_assembly_and_provider from ensembl.production.metadata.api.models import * logger = logging.getLogger(__name__) @@ -200,3 +201,21 @@ def test_pre_release_check_processed_alternative(self, test_dbs): factory = ReleaseFactory(test_dbs['ensembl_genome_metadata'].dbc.url) errors = factory.pre_release_check("4") assert not errors, f"Unexpected errors found: {errors}" + + +@pytest.mark.parametrize("test_dbs", [[{'src': Path(__file__).parent / "databases/ensembl_genome_metadata"}, + {'src': Path(__file__).parent / "databases/ncbi_taxonomy"}, + ]], indirect=True) +class TestFactoryUtils: + dbc: UnitTestDB = None + + def test_get_genome_sets_by_assembly_and_provider(self, test_dbs) -> None: + """ + Test `get_genome_sets_by_assembly_and_provider. + Pretty bad test. We haven't populated the metadata here with an updated genome so it just returns an empty set. + """ + metadata_db = DBConnection(test_dbs['ensembl_genome_metadata'].dbc.url) + + with metadata_db.session_scope() as session: + genome_sets = get_genome_sets_by_assembly_and_provider(session) + assert genome_sets == {} diff --git a/src/tests/test_scripts.py b/src/tests/test_scripts.py new file mode 100644 index 00000000..99159177 --- /dev/null +++ b/src/tests/test_scripts.py @@ -0,0 +1,330 @@ +# See the NOTICE file distributed with this work for additional information +# regarding copyright ownership. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from collections import namedtuple +from unittest.mock import patch + +import pytest + +from ensembl.production.metadata.api.models import Assembly +from ensembl.production.metadata.scripts.copy_handover_files import * +from ensembl.production.metadata.scripts.create_datasets_json import * +from ensembl.production.metadata.scripts.delete_ftp_by_uuid import * +from ensembl.production.metadata.scripts.load_meta_duckdb import * +from ensembl.production.metadata.scripts.organism_to_organismgroup import * + +db_directory = Path(__file__).parent / 'databases' +db_directory = db_directory.resolve() + +# Define a named tuple for script args +Args = namedtuple('Args', [ + 'metadata_db_uri', 'core_server_uri', 'organism_group_type', + 'organism_group_name', 'genome_uuid', 'release_id', 'remove', 'raise_error' +]) + +@pytest.mark.parametrize("test_dbs", [[{'src': Path(__file__).parent / "databases/ensembl_genome_metadata"}, + {'src': Path(__file__).parent / "databases/ncbi_taxonomy"}, + {'src': Path(__file__).parent / "databases/core_1"}, + ]], + indirect=True) +class TestScripts: + """Test suite for various metadata scripts.""" + + def test_check_directory_single_path_valid(self, test_dbs, tmp_path): + """Test check_directory function with single valid directory (returns string).""" + test_dir = tmp_path / "test_dir" + test_dir.mkdir() + result = check_directory(str(test_dir)) + assert result == [str(test_dir)] + + def test_check_directory_invalid(self, test_dbs): + """Test check_directory function with invalid directory.""" + with pytest.raises(argparse.ArgumentTypeError) as excinfo: + check_directory("/nonexistent/directory/path") + assert "does not exist" in str(excinfo.value) + + def test_generate_full_paths(self, test_dbs): + """Test generate_full_paths creates correct FTP and NFS paths.""" + relative_paths = ["species1/assembly1", "species2/assembly2"] + ftp_root = "/ftp/root/" + nfs_root = "/nfs/root/" + + result = generate_full_paths(relative_paths, ftp_root, nfs_root) + + assert len(result) == 4 # 2 relative paths * 2 roots + assert "/ftp/root/species1/assembly1" in result + assert "/nfs/root/species1/assembly1" in result + assert "/ftp/root/species2/assembly2" in result + assert "/nfs/root/species2/assembly2" in result + + def test_generate_full_paths_empty(self, test_dbs): + """Test generate_full_paths with empty input.""" + result = generate_full_paths([], "/ftp/", "/nfs/") + assert result == [] + + def test_submit_slurm_job_test_mode(self, test_dbs, capsys): + """Test submit_slurm_job in test mode (no actual submission).""" + paths = ["/path1", "/path2"] + submit_slurm_job(paths, test=True) + + captured = capsys.readouterr() + assert "[TEST MODE]" in captured.out + assert "/path1" in captured.out + assert "/path2" in captured.out + + def test_submit_slurm_job_empty_paths(self, test_dbs, capsys): + """Test submit_slurm_job with empty paths list.""" + submit_slurm_job([], test=False) + + captured = capsys.readouterr() + assert "No paths to delete" in captured.out + + @patch('subprocess.run') + def test_submit_slurm_job_actual_submission(self, mock_subprocess, test_dbs): + """Test submit_slurm_job makes correct subprocess call.""" + paths = ["/path1", "/path2"] + submit_slurm_job(paths, test=False) + + # Verify subprocess.run was called + mock_subprocess.assert_called_once() + call_args = mock_subprocess.call_args[0][0] + assert "sbatch" in call_args + assert "--wrap" in call_args + + def test_variation_tracks_json_parsing(self, test_dbs, tmp_path): + """Test variation_tracks function parses JSON correctly.""" + # Create test JSON file + test_data = { + "genome-uuid-1": { + "datafiles": { + "file1": str(tmp_path / "source1.vcf"), + "file2": str(tmp_path / "source2.vcf") + } + } + } + + # Create source files + (tmp_path / "source1.vcf").touch() + (tmp_path / "source2.vcf").touch() + + json_file = tmp_path / "test.json" + with open(json_file, 'w') as f: + json.dump(test_data, f) + + dest_dir = tmp_path / "destination" + dest_dir.mkdir() + + # Run the function + variation_tracks(str(json_file), "release_1", [str(dest_dir) + "/"]) + + # Verify files were copied + genome_dir = dest_dir / "genome-uuid-1" + assert genome_dir.exists() + assert (genome_dir / "source1.vcf").exists() + assert (genome_dir / "source2.vcf").exists() + + def test_variation_tracks_invalid_json(self, test_dbs, tmp_path): + """Test variation_tracks handles invalid JSON gracefully.""" + json_file = tmp_path / "invalid.json" + with open(json_file, 'w') as f: + f.write("not valid json{") + + with pytest.raises(Exception): + variation_tracks(str(json_file), "release_1", ["/tmp/"]) + + def test_regulation_copy_creates_directory(self, test_dbs, tmp_path): + """Test regulation_copy creates destination directories.""" + source_file = tmp_path / "source.bb" + source_file.touch() + + test_data = [ + { + "genome_uuid": "test-genome-uuid", + "dataset_source": {"name": str(source_file), "type": "bigbed"}, + "dataset_type": "regulation", + "dataset_attribute": [], + "name": "test_regulation", + "label": "test_label", + "version": "1.0" + } + ] + + json_file = tmp_path / "regulation.json" + with open(json_file, 'w') as f: + json.dump(test_data, f) + dest_base = tmp_path / "destination" + dest_base.mkdir() + regulation_copy(str(json_file), "release_1", [str(dest_base) + "/"]) + expected_dir = dest_base / "test-genome-uuid" + assert expected_dir.exists() + expected_file = expected_dir / f"regulatory-features{source_file.suffix}" + assert expected_file.exists() + + def test_fetch_division_name(self, test_dbs): + """Test fetch_division_name retrieves division from core database.""" + core_uri = test_dbs.get('core_1') + if core_uri: + with DBConnection(core_uri.dbc.url).session_scope() as session: + division = session.query(Meta).filter( + Meta.meta_key == 'species.division' + ).first() + result = fetch_division_name(core_uri.dbc.url) + if division: + assert result == division.meta_value + else: + assert result is None + + def test_create_organism_group_member(self, test_dbs): + """Test create_or_remove_organism_group creates new member.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + + with DBConnection(metadata_uri).session_scope() as session: + organism = session.query(Organism).first() + from ensembl.production.metadata.api.models import OrganismGroup + org_group = session.query(OrganismGroup).first() + if organism and org_group: + existing = session.query(OrganismGroupMember).filter( + OrganismGroupMember.organism_id == organism.organism_id, + OrganismGroupMember.organism_group_id == org_group.organism_group_id + ).first() + if not existing: + msg = create_or_remove_organism_group( + session, organism.organism_id, org_group.organism_group_id, remove=False + ) + assert "created successfully" in msg or "already exists" in msg + member = session.query(OrganismGroupMember).filter( + OrganismGroupMember.organism_id == organism.organism_id, + OrganismGroupMember.organism_group_id == org_group.organism_group_id + ).first() + assert member is not None + + def test_remove_organism_group_member(self, test_dbs): + """Test create_or_remove_organism_group removes member.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + with DBConnection(metadata_uri).session_scope() as session: + member = session.query(OrganismGroupMember).first() + if member: + organism_id = member.organism_id + group_id = member.organism_group_id + msg = create_or_remove_organism_group( + session, organism_id, group_id, remove=True + ) + assert "removed successfully" in msg or "not found" in msg + + def test_json_file_structure_for_ftp_copy(self, test_dbs, tmp_path): + """Test that ftp_copy can parse expected JSON structure.""" + test_data = [ + { + "genome_uuid": "test-uuid", + "dataset_source": { + "name": str(tmp_path / "test.file"), + "type": "vep" + }, + "dataset_type": "vep", + "name": "test_vep", + "label": "test_label", + "version": "1.0" + } + ] + json_file = tmp_path / "test_ftp.json" + with open(json_file, 'w') as f: + json.dump(test_data, f) + with open(json_file, 'r') as f: + loaded_data = json.load(f) + assert len(loaded_data) == 1 + assert loaded_data[0]['genome_uuid'] == "test-uuid" + assert loaded_data[0]['dataset_type'] == "vep" + + def test_duckdb_script_environment_variable(self, test_dbs, monkeypatch): + """Test that DuckDB script reads from environment variable.""" + test_uri = "mysql://testuser:testpass@testhost:3306/testdb" + monkeypatch.setenv('METADATA_DB', test_uri) + from urllib.parse import urlparse + db = urlparse(os.environ.get('METADATA_DB')) + assert db.hostname == "testhost" + assert db.port == 3306 + assert db.username == "testuser" + assert db.path[1:] == "testdb" + + def test_ftp_metadata_paths_structure(self, test_dbs): + """Test that genome public path structure is correct for FTP metadata.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + with DBConnection(metadata_uri).session_scope() as session: + genome = session.query(Genome).first() + if genome and hasattr(genome, 'get_public_path'): + paths = genome.get_public_path(dataset_type='genebuild') + assert isinstance(paths, list) + if len(paths) > 0: + first_path = paths[0] + assert 'dataset_type' in first_path or 'path' in first_path + + def test_genome_public_path_all_types(self, test_dbs): + """Test genome.get_public_path with 'all' dataset type.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + with DBConnection(metadata_uri).session_scope() as session: + genome = session.query(Genome).first() + if genome and hasattr(genome, 'get_public_path'): + paths = genome.get_public_path(dataset_type='all') + assert isinstance(paths, list) + if len(paths) > 1: + dataset_types = {p.get('dataset_type') for p in paths if 'dataset_type' in p} + assert len(dataset_types) > 1 + + def test_ftp_delete_checks_shared_organism(self, test_dbs): + """Test that FTP delete logic checks for shared organisms.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + + with DBConnection(metadata_uri).session_scope() as session: + genome = session.query(Genome).first() + if genome: + other_genomes_count = session.query(Genome).filter( + Genome.organism_id == genome.organism_id, + Genome.genome_uuid != genome.genome_uuid + ).count() + assert isinstance(other_genomes_count, int) + assert other_genomes_count >= 0 + + def test_ftp_delete_checks_shared_assembly(self, test_dbs): + """Test that FTP delete logic checks for shared assemblies.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + + with DBConnection(metadata_uri).session_scope() as session: + genome = session.query(Genome).first() + if genome: + other_assemblies_count = session.query(Genome).filter( + Genome.assembly_id == genome.assembly_id, + Genome.genome_uuid != genome.genome_uuid + ).count() + assert isinstance(other_assemblies_count, int) + assert other_assemblies_count >= 0 + + def test_organism_scientific_name_formatting(self, test_dbs): + """Test that organism scientific names are formatted correctly for paths.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + + with DBConnection(metadata_uri).session_scope() as session: + organism = session.query(Organism).first() + if organism: + scientific_name = organism.scientific_name + formatted_name = scientific_name.replace(" ", "_") + assert " " not in formatted_name + assert "_" in formatted_name or len(scientific_name.split()) == 1 + + def test_assembly_accession_in_paths(self, test_dbs): + """Test that assembly accessions are available for path construction.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + + with DBConnection(metadata_uri).session_scope() as session: + assembly = session.query(Assembly).first() + if assembly: + assert assembly.accession is not None + assert len(assembly.accession) > 0 + assert " " not in assembly.accession diff --git a/src/tests/test_updater.py b/src/tests/test_updater.py index 28d50fb8..c702be88 100644 --- a/src/tests/test_updater.py +++ b/src/tests/test_updater.py @@ -64,7 +64,6 @@ def test_new_organism(self, test_dbs): organism = session.query(Organism).where(Organism.biosample_id == 'Jabberwocky').first() assembly = session.query(Assembly).where(Assembly.name == 'jaber01').first() assert organism.scientific_name == 'carol_jabberwocky' - assert organism.genomes[0].genebuild_version == 'ENS01' assert organism.genomes[0].genebuild_date == '2023-01' # Test the Assembly assert assembly.accession == 'GCF_1111111123.3' @@ -78,17 +77,17 @@ def test_new_organism(self, test_dbs): assert dataset.dataset_type.name == "genebuild" # Testing assembly sequence is circular sequence = session.query(AssemblySequence).where( - (AssemblySequence.is_circular == 1) & (AssemblySequence.name == 'TEST1_seqA') + (AssemblySequence.is_circular == 1) & (AssemblySequence.name == 'AA123456.1') ).first() assert sequence is not None assert sequence.type == "primary_assembly" # Testing assembly_sequence.type sequence2 = session.query(AssemblySequence).where( - (AssemblySequence.is_circular == 0) & (AssemblySequence.name == 'TEST2_seqB') + (AssemblySequence.is_circular == 0) & (AssemblySequence.name == 'AA123456.2') ).first() assert sequence2 is not None assert sequence.type == "primary_assembly" sequence3 = session.query(AssemblySequence).where( - (AssemblySequence.is_circular == 0) & (AssemblySequence.name == 'TEST3_seqC') + (AssemblySequence.is_circular == 0) & (AssemblySequence.name == 'AA123456.3') ).first() assert sequence3 is not None count = session.query(Dataset).join(DatasetSource).join(DatasetType) \ @@ -133,7 +132,6 @@ def test_update_assembly(self, test_dbs): organism = genome.organism assert organism.scientific_name == 'carol_jabberwocky' assert genome.assembly.accession == 'weird02' - assert genome.genebuild_version == 'ENS01' assert genome.genebuild_date == '2024-02' # def test_update_geneset(self, test_dbs): @@ -161,21 +159,16 @@ def test_update_geneset(self, test_dbs): ).one() # Get the genebuild dataset for THIS genome - genebuild_genome_dataset = session.query(GenomeDataset).filter( - GenomeDataset.genome_id == genome.genome_id - ).join(Dataset).filter( - Dataset.name == 'genebuild', - Dataset.version == 'ENS02' - ).first() + genebuild_dataset = session.query(Dataset).join(GenomeDataset).join(Genome).filter( + Genome.genome_uuid == inserted_genome_uuid, + Dataset.name == "genebuild" + ).one() # ← ADD THIS! - assert genebuild_genome_dataset is not None - dataset = genebuild_genome_dataset.dataset + assert genebuild_dataset is not None - assert dataset is not None - assert re.match(".*_core_4", dataset.dataset_source.name) - assert dataset.dataset_source.type == "core" - assert dataset.dataset_type.name == "genebuild" - assert genome.genebuild_version == 'ENS02' + assert re.match(".*_core_4", genebuild_dataset.dataset_source.name) + assert genebuild_dataset.dataset_source.type == "core" + assert genebuild_dataset.dataset_type.name == "genebuild" assert genome.genebuild_date == '2023-01' # From core_4 meta table assert len(genome.genome_releases) > 0 diff --git a/src/tests/tests_exports.py b/src/tests/tests_exports.py deleted file mode 100644 index 536aec6f..00000000 --- a/src/tests/tests_exports.py +++ /dev/null @@ -1,20 +0,0 @@ -# See the NOTICE file distributed with this work for additional information -# regarding copyright ownership. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -Unit tests for utils.py -""" -import logging - -logger = logging.getLogger(__name__) - -# TODO create tests for the stats generator and the changlog generator. Wait for the new schema as this will be -# useless to do now. From b7c2cb44754ed7dae96900f1a0d13fc5c466cdec Mon Sep 17 00:00:00 2001 From: danielp Date: Fri, 24 Oct 2025 14:55:00 +0100 Subject: [PATCH 08/14] fixed single failing test. --- .../metadata/api/adaptors/genome.py | 5 +- src/scripts/update_test_set.py | 152 ------------------ src/scripts/updates_metadata.sql | 130 --------------- src/tests/test_scripts.py | 3 +- 4 files changed, 3 insertions(+), 287 deletions(-) delete mode 100644 src/scripts/update_test_set.py delete mode 100644 src/scripts/updates_metadata.sql diff --git a/src/ensembl/production/metadata/api/adaptors/genome.py b/src/ensembl/production/metadata/api/adaptors/genome.py index 9f305967..a3fb9eaf 100644 --- a/src/ensembl/production/metadata/api/adaptors/genome.py +++ b/src/ensembl/production/metadata/api/adaptors/genome.py @@ -989,8 +989,7 @@ def fetch_genome_group_members_detailed(self, genome_group_id=None, group_name=N session.expire_on_commit = False return session.execute(member_select).all() - - def get_public_path(self, genome_uuid, dataset_type='all', release=None): + def get_public_path(self, genome_uuid, dataset_type='all'): paths = [] scientific_name = None accession = None @@ -1060,7 +1059,7 @@ def get_public_path(self, genome_uuid, dataset_type='all', release=None): } # Check for invalid dataset type early - if dataset_type not in unique_dataset_types and dataset_type != 'all': + if dataset_type not in unique_dataset_type s and dataset_type != 'all': raise TypeNotFoundException(f"Dataset Type : {dataset_type} not found in metadata.") # If 'all', add paths for all unique dataset types diff --git a/src/scripts/update_test_set.py b/src/scripts/update_test_set.py deleted file mode 100644 index 386b4eff..00000000 --- a/src/scripts/update_test_set.py +++ /dev/null @@ -1,152 +0,0 @@ -#!/usr/bin/env python -# See the NOTICE file distributed with this work for additional information -# regarding copyright ownership. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import argparse -import logging -import os -import random -from datetime import datetime, timedelta -from typing import List - -from ensembl.utils.database import DBConnection -from sqlalchemy.engine import make_url - -from ensembl.production.metadata.api.factories.datasets import DatasetFactory -from ensembl.production.metadata.api.factories.genomes import GenomeFactory -from ensembl.production.metadata.api.factories.release import ReleaseFactory -from ensembl.production.metadata.api.models import * - -logger = logging.getLogger(__name__) - -gen_factory = GenomeFactory() - - -class MetadataUpdater: - - def __init__(self, metadata_uri): - super().__init__() - self.metadata_uri = metadata_uri - - def check(self): - metadata_db = DBConnection(self.metadata_uri) - with metadata_db.session_scope() as session: - releases = session.query(EnsemblRelease).order_by(EnsemblRelease.version).all() - factory = ReleaseFactory(self.metadata_uri) - [factory.check_release(rel) for rel in releases] - - def wipe(self): - metadata_db = DBConnection(self.metadata_uri) - with metadata_db.session_scope() as session: - dataset_types = session.query(DatasetType.dataset_type_id).filter( - DatasetType.topic.in_(['production_process', 'production_preparation', 'production_publication'])) - delete = session.query(Dataset).filter(Dataset.dataset_type_id.in_(dataset_types)).delete() - session.execute(delete) - - def create_release_ds(self): - metadata_db = DBConnection(self.metadata_uri) - with metadata_db.session_scope() as session: - ds_factory = DatasetFactory(self.metadata_uri) - releases: List[EnsemblRelease] = session.query(EnsemblRelease).all() - for release in releases: - if release.status == ReleaseStatus.RELEASED: - dataset_status = DatasetStatus.RELEASED - topic = None - elif release.status == ReleaseStatus.PREPARING: - dataset_status = DatasetStatus.PROCESSED - topic = ['production_process'] - elif release.status == ReleaseStatus.PREPARED: - dataset_status = DatasetStatus.PROCESSED - topic = ['production_process', 'production_preparation'] - for genome_dataset in release.genome_datasets: - if topic is not None: - for top in topic: - ds_factory.create_all_child_datasets(dataset_uuid=genome_dataset.dataset.dataset_uuid, - topic=top, - session=session, - status=dataset_status, - release=release) - else: - ds_factory.create_all_child_datasets(dataset_uuid=genome_dataset.dataset.dataset_uuid, - topic=None, - session=session, - status=dataset_status, - release=release) - # Randomly assign dates for production datasets expected attributes - datasets = session.query(Dataset, EnsemblRelease).select_from(Dataset).join( - DatasetType.datasets).join(GenomeDataset, GenomeDataset.dataset_id == Dataset.dataset_id).outerjoin( - EnsemblRelease, EnsemblRelease.release_id == GenomeDataset.release_id).filter( - DatasetType.topic.in_(('production_process', 'production_preparation'))).order_by(Dataset.dataset_uuid) - # attribute_id IN(183, 182) - for dataset in datasets.all(): - end = None - start = None - if dataset.Dataset.status == DatasetStatus.RELEASED and dataset.EnsemblRelease.release_date is not None: - logger.info(f"Dataset {dataset.Dataset.dataset_uuid} is released") - start = dataset.EnsemblRelease.release_date - timedelta(weeks=3) - end = dataset.EnsemblRelease.release_date - elif dataset.Dataset.status == DatasetStatus.PROCESSED: - logger.info(f"Dataset {dataset.Dataset.dataset_uuid} is processed") - start = datetime.now() - timedelta(weeks=1) - end = datetime.now() - timedelta(days=1) - elif dataset.Dataset.status == DatasetStatus.PROCESSING: - logger.info(f"Dataset {dataset.Dataset.dataset_uuid} is processing") - start = datetime.now() - timedelta(weeks=1) - end = None - if end: - start_build = start + (end - start) * random.random() - end_build = start_build + timedelta(days=1) - - session.add(DatasetAttribute(dataset_id=dataset.Dataset.dataset_id, - attribute_id=183, - value=datetime.strftime(end_build, "%y/%m/%d"))) - if start: - if not end: - end = datetime.now() - start_build = start + (end - start) * random.random() - session.add(DatasetAttribute(dataset_id=dataset.Dataset.dataset_id, - attribute_id=182, - value=datetime.strftime(start_build, "%y/%m/%d"))) - - def create_submitted_ds(self): - metadata_db = DBConnection(self.metadata_uri) - with metadata_db.session_scope() as session: - ds_factory = DatasetFactory(self.metadata_uri) - datasets = session.query(Dataset).join(GenomeDataset.dataset).filter(GenomeDataset.release_id == None).all() - for dataset in datasets: - ds_factory.create_all_child_datasets(dataset_uuid=dataset.dataset_uuid, - topic='production_process', - session=session, - status=dataset.status) - - -def main(): - parser = argparse.ArgumentParser( - prog='update_test_set.py', - description='Some potential useful methods to update the test set on host' - ) - parser.add_argument('-m', '--metadata_db_uri', type=str, - default="mysql://ensembl@localhost:3306/marco_ensembl_genome_metadata", - required=False, help='Target metadata uri') - - parser.add_argument('--action', type=str, help="Action method to call (check|wipe|create)", - required=False, default='create_submitted_ds') - args = parser.parse_args() - meta_details = make_url(args.metadata_db_uri) - logger.info(f'Connecting Metadata Database with host:{meta_details.host} & dbname:{meta_details.database}') - meta_updater = MetadataUpdater(args.metadata_db_uri) - getattr(meta_updater, args.action)() - - -if __name__ == "__main__": - logger.info('Updating metadata content') - main() diff --git a/src/scripts/updates_metadata.sql b/src/scripts/updates_metadata.sql deleted file mode 100644 index ddd7ef13..00000000 --- a/src/scripts/updates_metadata.sql +++ /dev/null @@ -1,130 +0,0 @@ -#### DB updates to reset datasets -# DELETE non root datasets -delete dataset -from dataset where dataset_type_id > 7; -# Reinsert new dataset_types - -select * from dataset where dataset_type_id > 7; - -delete from dataset_type where dataset_type_id > 7; -INSERT INTO dataset_type (dataset_type_id, name, label, topic, description, details_uri, parent_id, depends_on, filter_on) VALUES (8, 'genebuild_compute', 'External References', 'production_process', 'Xref genome annotation for Genebuild', null, 2, null, null); -INSERT INTO dataset_type (dataset_type_id, name, label, topic, description, details_uri, parent_id, depends_on, filter_on) VALUES (9, 'genebuild_files', 'Files dumps', 'production_process', 'File Dumps, either internal or for public consumption', null, 2, '8', null); -INSERT INTO dataset_type (dataset_type_id, name, label, topic, description, details_uri, parent_id, depends_on, filter_on) VALUES (11, 'genebuild_web', 'Web Geneset content', 'production_process', 'Web Geneset related content', null, 2, null, null); -INSERT INTO dataset_type (dataset_type_id, name, label, topic, description, details_uri, parent_id, depends_on, filter_on) VALUES (12, 'genebuild_prep', 'Genebuild preparation', 'production_preparation', 'Web Content for Geneset publication', null, 2, '8,9,11,12', null); -INSERT INTO dataset_type (dataset_type_id, name, label, topic, description, details_uri, parent_id, depends_on, filter_on) VALUES (13, 'xrefs', 'External References', 'production_process', 'External annotations linking', null, 8, null, null); -INSERT INTO dataset_type (dataset_type_id, name, label, topic, description, details_uri, parent_id, depends_on, filter_on) VALUES (14, 'protein_features', 'Protein Features annotations', 'production_process', 'Proteins annotation', null, 8, '13', null); -INSERT INTO dataset_type (dataset_type_id, name, label, topic, description, details_uri, parent_id, depends_on, filter_on) VALUES (15, 'alpha_fold', 'AlphaFold computation', 'production_process', 'Compute Protein structure with Alphafold', null, 8, '13', null); -INSERT INTO dataset_type (dataset_type_id, name, label, topic, description, details_uri, parent_id, depends_on, filter_on) VALUES (16, 'blast', 'Blast tools', 'production_process', 'Blast Indexes files', null, 9, '8', null); -INSERT INTO dataset_type (dataset_type_id, name, label, topic, description, details_uri, parent_id, depends_on, filter_on) VALUES (17, 'ftp_dumps', 'Public FTP files', 'production_process', 'Public FTP flat files geneset dumps', null, 9, '8', null); -INSERT INTO dataset_type (dataset_type_id, name, label, topic, description, details_uri, parent_id, depends_on, filter_on) VALUES (18, 'thoas_dumps', 'Thoas load flat files', 'production_process', 'Dump flat file to load onto THOAS', null, 11, null, null); -INSERT INTO dataset_type (dataset_type_id, name, label, topic, description, details_uri, parent_id, depends_on, filter_on) VALUES (19, 'thoas_load', 'Thoas MongoDB Load', 'production_preparation', 'Load dumped files onto THOAS', null, 12, '18,23', null); -INSERT INTO dataset_type (dataset_type_id, name, label, topic, description, details_uri, parent_id, depends_on, filter_on) VALUES (20, 'genebuild_browser_files', 'Genome Browser BB Geneset files', 'production_process', 'Production BigBed for Genome Browser', null, 11, null, null); -INSERT INTO dataset_type (dataset_type_id, name, label, topic, description, details_uri, parent_id, depends_on, filter_on) VALUES (21, 'genebuild_track', 'Geneset Tracks API', 'production_preparation', 'Register Geneset Track API BigBed files', null, 12, '20', null); -INSERT INTO dataset_type (dataset_type_id, name, label, topic, description, details_uri, parent_id, depends_on, filter_on) VALUES (23, 'checksums', 'Sequences Checksums', 'production_process', 'Compute core sequence checksums and update metadata', null, 11, null, null); -INSERT INTO dataset_type (dataset_type_id, name, label, topic, description, details_uri, parent_id, depends_on, filter_on) VALUES (24, 'refget_load', 'Refget Loading', 'production_preparation', 'Load sequences and their checksum onto Refget app', null, 12, '22', null); -INSERT INTO dataset_type (dataset_type_id, name, label, topic, description, details_uri, parent_id, depends_on, filter_on) VALUES (25, 'homology_compute', 'Homology annotation', 'production_process', 'Compute Genome homology analysis', null, 6, null, null); -INSERT INTO dataset_type (dataset_type_id, name, label, topic, description, details_uri, parent_id, depends_on, filter_on) VALUES (26, 'homology_load', 'Homology dataload', 'production_preparation', 'Load homology data onto Compara Service (MongoDB)', null, 6, '25', null); -INSERT INTO dataset_type (dataset_type_id, name, label, topic, description, details_uri, parent_id, depends_on, filter_on) VALUES (27, 'homology_ftp', 'Homology tsv public files', 'production_preparation', 'Dump and sync public TSV homology files', null, 6, '25', null); -INSERT INTO dataset_type (dataset_type_id, name, label, topic, description, details_uri, parent_id, depends_on, filter_on) VALUES (28, 'vep', 'VEP filesets', 'variation_annotation', 'VCF annotation file for geneset', null, null, null, null); -INSERT INTO dataset_type (dataset_type_id, name, label, topic, description, details_uri, parent_id, depends_on, filter_on) VALUES (29, 'variation_ftp', 'Public Variation files (vcf)', 'production_preparation', 'VCF files for public FTP', null, 3, null, null); -INSERT INTO dataset_type (dataset_type_id, name, label, topic, description, details_uri, parent_id, depends_on, filter_on) VALUES (31, 'variation_browser_files', 'Variation Browser files', 'production_process', 'Variation track browser file', null, 3, null, null); -INSERT INTO dataset_type (dataset_type_id, name, label, topic, description, details_uri, parent_id, depends_on, filter_on) VALUES (32, 'variation_track', 'Variation Track', 'production_preparation', 'Variation Track API', null, 3, '31', null); -INSERT INTO dataset_type (dataset_type_id, name, label, topic, description, details_uri, parent_id, depends_on, filter_on) VALUES (33, 'regulation_browser_files', 'Regulation Browser files', 'production_process', 'Regulation track browser file', null, 7, null, null); -INSERT INTO dataset_type (dataset_type_id, name, label, topic, description, details_uri, parent_id, depends_on, filter_on) VALUES (34, 'regulation_track', 'Regulation Track', 'production_preparation', 'Regulation Track API', null, 7, '33', null); -INSERT INTO dataset_type (dataset_type_id, name, label, topic, description, details_uri, parent_id, depends_on, filter_on) VALUES (35, 'regulation_ftp', 'Regulation Public files', 'production_preparation', 'Regulation public files', null, 7, '33', null); -INSERT INTO dataset_type (dataset_type_id, name, label, topic, description, details_uri, parent_id, depends_on, filter_on) VALUES (37, 'web_genesearch', 'GeneSearch Index', 'production_publication', 'Gene search indexes provisioning', null, null, '36', null); -INSERT INTO dataset_type (dataset_type_id, name, label, topic, description, details_uri, parent_id, depends_on, filter_on) VALUES (38, 'web_genomediscovery', 'Genome Search indexes loading to EBI search', 'production_publication', 'Load dumped data from genebuild_web onto EBI Search engine (SpecieSelector)', null, null, '37', null); - -# DELETE Bombus_terristris unlinked dataset -delete -from dataset -where dataset_uuid = '428d2741-2699-48a4-8830-4f808994f512'; - -# RELEASE datasets and genomes for 110.2 -update dataset - join genome_dataset using (dataset_id) -set status = 'Processed' # Change to 'Released' when releasing -where release_id = (select release_id from ensembl_release where version = 110.2); - -# RESET dataset which are not attached to a release -# Mark all Unreleased assembly dataset as 'Processed' -update dataset - join genome_dataset using (dataset_id) -set status = 'Processed' -where release_id is null - and dataset_type_id = (select dataset_type_id - from dataset_type - where dataset.name = 'assembly' - and dataset_type.dataset_type_id = dataset.dataset_type_id); - -# Mark all others as 'Submitted' -update dataset - join genome_dataset using (dataset_id) -set status = 'Submitted' -where release_id is null - and dataset_type_id = (select dataset_type_id - from dataset_type - where dataset.name <> 'assembly' - and dataset_type.dataset_type_id = dataset.dataset_type_id); - -# INSERT compara_homologies from 241 to 110.2 -BEGIN; -INSERT INTO dataset (dataset_uuid, name, version, created, label, dataset_source_id, dataset_type_id, status) -SELECT UUID(), - name, - '2.0', - NOW(), - dataset.label, - dataset.dataset_source_id, - 6, - 'Released' -from dataset - join genome_dataset using (dataset_id) -where dataset_type_id = 6 - and genome_dataset.release_id = 1; - -# INSERT compara_homologies as supplementary dataset for the 241 in next release. -INSERT INTO genome_dataset (is_current, dataset_id, genome_id, release_id) -select 0, - dataset_id, - (select genome.genome_id - from genome - join genome_dataset gd using (genome_id) - join dataset d1 using (dataset_id) - where gd.genome_id in (SELECT genome.genome_id - from genome - join genome_dataset using (genome_id) - join dataset using (dataset_id) - where dataset_type_id = 6 - and genome_dataset.release_id = 1) - and d1.dataset_source_id = d.dataset_source_id) as genebuild_genome_id, - 2 -from dataset d -where version = '2.0'; - -# UPDATE homologies for 110.1 is_current to 0 -UPDATE genome_dataset - join dataset using (dataset_id) -set is_current = 0 -where dataset_type_id = 6 - and release_id = 1; - -# UPDATE homologies for 110.2 is_current to 1 -UPDATE genome_dataset - join dataset using (dataset_id) -set is_current = 1 -where dataset_type_id = 6 - and release_id = 2; - -# UPDATE 110.2 as released -update ensembl_release -set is_current = 0 -where version = 110.1; -update ensembl_release -set status = 'Released', - is_current = 1 -where version = 110.2; -COMMIT; - - - diff --git a/src/tests/test_scripts.py b/src/tests/test_scripts.py index 99159177..64b63ac4 100644 --- a/src/tests/test_scripts.py +++ b/src/tests/test_scripts.py @@ -18,7 +18,6 @@ from ensembl.production.metadata.scripts.copy_handover_files import * from ensembl.production.metadata.scripts.create_datasets_json import * from ensembl.production.metadata.scripts.delete_ftp_by_uuid import * -from ensembl.production.metadata.scripts.load_meta_duckdb import * from ensembl.production.metadata.scripts.organism_to_organismgroup import * db_directory = Path(__file__).parent / 'databases' @@ -43,7 +42,7 @@ def test_check_directory_single_path_valid(self, test_dbs, tmp_path): test_dir = tmp_path / "test_dir" test_dir.mkdir() result = check_directory(str(test_dir)) - assert result == [str(test_dir)] + assert result == str(test_dir) def test_check_directory_invalid(self, test_dbs): """Test check_directory function with invalid directory.""" From 3948ca607f5eb2552360a20ae4bd6aea6ab6f386 Mon Sep 17 00:00:00 2001 From: danielp Date: Fri, 24 Oct 2025 14:56:00 +0100 Subject: [PATCH 09/14] Removed accidental space --- src/ensembl/production/metadata/api/adaptors/genome.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ensembl/production/metadata/api/adaptors/genome.py b/src/ensembl/production/metadata/api/adaptors/genome.py index a3fb9eaf..d4115510 100644 --- a/src/ensembl/production/metadata/api/adaptors/genome.py +++ b/src/ensembl/production/metadata/api/adaptors/genome.py @@ -1059,7 +1059,7 @@ def get_public_path(self, genome_uuid, dataset_type='all'): } # Check for invalid dataset type early - if dataset_type not in unique_dataset_type s and dataset_type != 'all': + if dataset_type not in unique_dataset_types and dataset_type != 'all': raise TypeNotFoundException(f"Dataset Type : {dataset_type} not found in metadata.") # If 'all', add paths for all unique dataset types From 3bc24c54be0ade62d24ca9e26a10a842e229e22d Mon Sep 17 00:00:00 2001 From: danielp Date: Wed, 29 Oct 2025 11:43:24 +0000 Subject: [PATCH 10/14] tol_id moved from assembly to organism --- .../metadata/api/models/assembly.py | 1 - .../metadata/api/models/organism.py | 1 + .../metadata/grpc/protobuf_msg_factory.py | 1 - .../production/metadata/updater/core.py | 36 ++++++++++++------ .../databases/ensembl_genome_metadata.db | Bin 262144 -> 262144 bytes 5 files changed, 25 insertions(+), 14 deletions(-) diff --git a/src/ensembl/production/metadata/api/models/assembly.py b/src/ensembl/production/metadata/api/models/assembly.py index c71620f5..aaac492a 100644 --- a/src/ensembl/production/metadata/api/models/assembly.py +++ b/src/ensembl/production/metadata/api/models/assembly.py @@ -31,7 +31,6 @@ class Assembly(LoadAble, Base): name = Column(String(128), nullable=False) accession_body = Column(String(32)) assembly_default = Column(String(128)) - tol_id = Column(String(32)) created = Column(DateTime) ensembl_name = Column(String(255), unique=True) is_reference = Column(TINYINT(1), nullable=False, default=0) diff --git a/src/ensembl/production/metadata/api/models/organism.py b/src/ensembl/production/metadata/api/models/organism.py index 5a06666f..99156d66 100644 --- a/src/ensembl/production/metadata/api/models/organism.py +++ b/src/ensembl/production/metadata/api/models/organism.py @@ -34,6 +34,7 @@ class Organism(LoadAble, Base): scientific_parlance_name = Column(String(255)) rank = Column(Integer, default=0) strain_type = Column(String(128), nullable=True, unique=False) + tol_id = Column(String(32)) # One to many relationships # Organism_id to organism_group_member and genome genomes = relationship("Genome", back_populates="organism", cascade="all, delete, delete-orphan") diff --git a/src/ensembl/production/metadata/grpc/protobuf_msg_factory.py b/src/ensembl/production/metadata/grpc/protobuf_msg_factory.py index 1fa96c05..13d03703 100644 --- a/src/ensembl/production/metadata/grpc/protobuf_msg_factory.py +++ b/src/ensembl/production/metadata/grpc/protobuf_msg_factory.py @@ -116,7 +116,6 @@ def create_assembly(data=None): ucsc_name=data.Assembly.ucsc_name, ensembl_name=data.Assembly.ensembl_name, is_reference=data.Assembly.is_reference, - tol_id=data.Assembly.tol_id, ) return assembly diff --git a/src/ensembl/production/metadata/updater/core.py b/src/ensembl/production/metadata/updater/core.py index 0c20ddd2..e612243d 100644 --- a/src/ensembl/production/metadata/updater/core.py +++ b/src/ensembl/production/metadata/updater/core.py @@ -439,6 +439,8 @@ def get_or_new_organism(self, species_id, meta_session): biosample_id = self.get_meta_single_meta_key(species_id, "organism.biosample_id") if biosample_id is None: biosample_id = self.get_meta_single_meta_key(species_id, "organism.production_name") + tol_id = self.get_meta_single_meta_key(species_id, "assembly.tol_id") # This one should be deleted eventually. + tol_id = self.get_meta_single_meta_key(species_id, "organism.tol_id") # Getting the common name from the meta table, otherwise we grab it from ncbi. common_name = self.get_meta_single_meta_key(species_id, "organism.common_name") @@ -468,7 +470,8 @@ def get_or_new_organism(self, species_id, meta_session): biosample_id=biosample_id, strain=self.get_meta_single_meta_key(species_id, "organism.strain"), strain_type=self.get_meta_single_meta_key(species_id, "organism.type"), - scientific_parlance_name=self.get_meta_single_meta_key(species_id, "organism.scientific_parlance_name") + scientific_parlance_name=self.get_meta_single_meta_key(species_id, "organism.scientific_parlance_name"), + tol_id=tol_id ) # Query the metadata database to find if an Organism with the same Ensembl name already exists. @@ -812,7 +815,6 @@ def _create_new_assembly(self, species_id, meta_session, dataset_source, assembl with self.db.session_scope() as session: level = (session.execute(db.select(CoordSystem.name).filter( CoordSystem.species_id == species_id).order_by(CoordSystem.rank)).all())[0][0] - tol_id = self.get_meta_single_meta_key(species_id, "assembly.tol_id") accession_body = self.get_meta_single_meta_key(species_id, "assembly.accession_body") if self.get_meta_single_meta_key( species_id, "assembly.accession_body") else "INSDC" @@ -824,7 +826,6 @@ def _create_new_assembly(self, species_id, meta_session, dataset_source, assembl name=self.get_meta_single_meta_key(species_id, "assembly.name"), accession_body=accession_body, assembly_default=self.get_meta_single_meta_key(species_id, "assembly.default"), - tol_id=tol_id, created=func.now(), assembly_uuid=str(uuid.uuid4()), is_reference=is_reference @@ -859,7 +860,7 @@ def _create_genebuild(self, species_id, meta_session, source=None): assembly_accession = self.get_meta_single_meta_key(species_id, "assembly.accession") provider_name = self.get_meta_single_meta_key(species_id, "genebuild.provider_name") last_geneset_update = self.get_meta_single_meta_key(species_id, "genebuild.last_geneset_update") - + annotation_source = self.get_meta_single_meta_key(species_id, "genebuild.annotation_source") # Query for an existing combination - this is our uniqueness check # If this exists, we should NOT create a new one existing_combination = ( @@ -880,8 +881,25 @@ def _create_genebuild(self, species_id, meta_session, source=None): "Cannot create duplicate genebuild." ) - # Create a label for the dataset - this is just for human readability - # Old labels stay untouched; new ones use a descriptive format + # Check for conflicting annotation source + # This isn't persay a strict requirment but it will make the FTP confusing as hell if we allow it. + conflicting_combination = ( + meta_session.query(Genome.genome_id) + .join(Assembly, Genome.assembly_id == Assembly.assembly_id) + .filter( + Assembly.accession == assembly_accession, + Genome.provider_name != provider_name, + Genome.annotation_source == annotation_source, + ) + ) + + test_for_conflicting = meta_session.query(conflicting_combination.exists()).scalar() + if test_for_conflicting: + raise exceptions.MetaException( + f"Genebuild already exists for assembly {assembly_accession} " + f"existing genebuild with different provider uses an annotation source of '{annotation_source}'. " + "Please use a different one." + ) genebuild_label = f"{assembly_accession}_{provider_name}_{last_geneset_update}" if source is None: @@ -890,14 +908,8 @@ def _create_genebuild(self, species_id, meta_session, source=None): dataset_source = source dataset_type = meta_session.query(DatasetType).filter(DatasetType.name == "genebuild").first() - - # Get all genebuild attributes from the core database attributes = self.get_meta_list_from_prefix_meta_key(species_id, "genebuild.") - - # Use genebuild_date as the version (more meaningful than arbitrary version numbers) dataset_version = last_geneset_update - - # Create new dataset dataset_factory = DatasetFactory(self.metadata_uri) (dataset_uuid, genebuild_dataset, genebuild_dataset_attributes, new_genome_dataset) = dataset_factory.create_dataset( diff --git a/src/tests/databases/ensembl_genome_metadata.db b/src/tests/databases/ensembl_genome_metadata.db index ca2bee236ce39ca411e07fe029d6d6530d638152..1f8767875b9584c2820d2b625f63438be02a0499 100644 GIT binary patch delta 719 zcmZo@5NK!+m>?~v!@$7c0K_mLIZ?-$QDM$JYmkf^RXgSn$QP_$iijtoPiPNNA})RMu{(K3|L z-PtkTz{t?j)X-q^M>+AyVPds{3_z{yC|aM0%@F{KW}^sB5C;h=r+@{G8H^o`ftJ`v zOaoe?nTV_sY=@zRse!45*<=SXvB@lwRY0B0F+2>Sx{8Vnl8%Z%oqdvJ{EWr^AdQOR z4C0RBKrvydGy%p&W(^dDol<2$-8w>GE1=$(%r8BkpP_~ytQ!_Kd!>P4)2Pe|RtR^X z4A_Av{zG!05yXMAAO~u)fpx;duTK^n2&^cAR&roL78Jo}a&v%=VFZc6BWj|2J|p8M z1;z>jNJ?z-n@o9-duY)-!gWy3$j@Jy^i7wY3P`P@$V z%~_j|v#{`aFi1^w6ys`^X9OG3Cb&6hA1mYJ-2Ia`zug(l!otA7xC3a*7RE^pESp&l z{N)z}TFuHV&%kq%>jL{0R$FHI%|aJ=nKwVWE622X*5z`h&2<-Km^Z86{K>f4c!N1} R(}T7LjBO8?mOWrr002l`p_c#v delta 674 zcmZo@5NK!+m>?~v#lXPe0K_mLK2gV*QEOwue11l*O#*c+yu1ty+KRFavJ8&0p`4Su zMDhhH8zaDi(hSlJj?zFuPSG5}M$JYmkf^RXgE@nvc__!^HqkjUjXI4cU{Om3O9n^F zP!4xz$9MxHLrYUbgUKJ{#3zS})ye|3vZH7PD>Ae&F|#x=n*32tVlumUg+QZbHj1Lj z{o->38kJMPg2oKS435S?ORXiQ0aYfVs(dH`RLLB}%OI+&sK_A6;HU_6bgyI?f3ZJE zp`tj0ID?}&P)tZF4X989Ss};d4yiJMMjaurkuYye=98Y!U&9a94G)_=()mCKae@^h z93%sB5UT$sKa>GFNRth$5*}W?vLHcLR6$EQkRS`H;8VFd{56aqF+{{nkk8+wz&ODO z7>``Qz~J-XHRSQ;+Qa#l!;oE&&5}6_D5wpKWEjl>Vr)Kn$DDa{*5>0ZT+Nnj?BcSr zjBT!)r*53eIQiW+iOnx|CIBT&86gt3n?rZ7G76UD=fr2GD1gSH2ZZ4a21Jz!P<04vXz2><{9 From 3e86ff5f718802f6d5a4495d148fba5200a96eda Mon Sep 17 00:00:00 2001 From: danielp Date: Mon, 3 Nov 2025 15:23:24 +0000 Subject: [PATCH 11/14] Update to match Jorge's suggestions. Initial search outline --- pyproject.toml | 1 + .../production/metadata/api/adaptors/base.py | 5 - .../metadata/api/adaptors/genome.py | 43 ++- .../metadata/api/adaptors/release.py | 8 - .../metadata/api/models/taxonomy.py | 1 + .../production/metadata/api/search/search.py | 342 ++++++++++++++++++ src/tests/conftest.py | 19 +- src/tests/test_dataset_factory.py | 17 +- src/tests/test_protobuf_msg_factory.py | 6 +- src/tests/test_release_factory.py | 8 +- src/tests/test_updater.py | 9 +- 11 files changed, 399 insertions(+), 60 deletions(-) create mode 100644 src/ensembl/production/metadata/api/search/search.py diff --git a/pyproject.toml b/pyproject.toml index f7aedb38..709d109f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -61,6 +61,7 @@ dependencies = [ "duckdb-engine >= 0.17.0", "pymysql", "mysqlclient", + "pydantic" ] [project.urls] diff --git a/src/ensembl/production/metadata/api/adaptors/base.py b/src/ensembl/production/metadata/api/adaptors/base.py index 745360ee..34831b8d 100644 --- a/src/ensembl/production/metadata/api/adaptors/base.py +++ b/src/ensembl/production/metadata/api/adaptors/base.py @@ -14,11 +14,6 @@ from ensembl.production.metadata.grpc.config import cfg -##Todo: Add in OrganismAdapator. Subfunction fetches all organism in popular group. and # of genomes from distinct assemblies. -# Add in best genome (see doc) -# More functions for related genomes - - class BaseAdaptor: def __init__(self, metadata_uri): self.metadata_db = DBConnection(metadata_uri, pool_size=cfg.pool_size, pool_recycle=cfg.pool_recycle) diff --git a/src/ensembl/production/metadata/api/adaptors/genome.py b/src/ensembl/production/metadata/api/adaptors/genome.py index d4115510..8e2ee330 100644 --- a/src/ensembl/production/metadata/api/adaptors/genome.py +++ b/src/ensembl/production/metadata/api/adaptors/genome.py @@ -146,13 +146,30 @@ def fetch_genomes_by_assembly_name_genebuild(self, session.expire_on_commit = False return session.execute(genome_select).all() - def fetch_genomes(self, genome_id=None, genome_uuid=None, genome_tag=None, organism_uuid=None, - assembly_uuid=None, assembly_accession=None, assembly_name=None, - use_default_assembly=False, biosample_id=None, production_name=None, - taxonomy_id=None, group=None, genome_group_id=None, genome_group_name=None, - genome_group_type=None, - genome_group_reference_only=False, unreleased_only=False, site_name=None, release_type=None, - release_version=None, current_only=False): + def fetch_genomes( + self, + genome_id=None, + genome_uuid=None, + genome_tag=None, + organism_uuid=None, + assembly_uuid=None, + assembly_accession=None, + assembly_name=None, + use_default_assembly=False, + biosample_id=None, + production_name=None, + taxonomy_id=None, + group=None, + genome_group_id=None, + genome_group_name=None, + genome_group_type=None, + genome_group_reference_only=False, + unreleased_only=False, + site_name=None, + release_type=None, + release_version=None, + current_only=False, + ): """ Fetches genome information based on the specified parameters. @@ -896,8 +913,9 @@ def fetch_assemblies_count(self, species_taxonomy_id: int, release_version: floa with self.metadata_db.session_scope() as session: return session.execute(query).scalar() - def fetch_genome_groups(self, genome_id=None, genome_uuid=None, group_type=None, - is_current=True, release_version=None): + def fetch_genome_groups( + self, genome_id=None, genome_uuid=None, group_type=None, is_current=True, release_version=None + ): """ Fetch all genome groups that a genome belongs to. @@ -914,7 +932,7 @@ def fetch_genome_groups(self, genome_id=None, genome_uuid=None, group_type=None, genome_id = check_parameter(genome_id) query = query.where(Genome.genome_id.in_(genome_id)) - if genome_uuid: + elif genome_uuid: genome_uuid = check_parameter(genome_uuid) query = query.where(Genome.genome_uuid.in_(genome_uuid)) @@ -936,8 +954,9 @@ def fetch_genome_groups(self, genome_id=None, genome_uuid=None, group_type=None, session.expire_on_commit = False return session.execute(query).scalars().all() - def fetch_genome_group_members_detailed(self, genome_group_id=None, group_name=None, - is_current=True, release_version=None): + def fetch_genome_group_members_detailed( + self, genome_group_id=None, group_name=None, is_current=True, release_version=None + ): """ Fetch genomes and their membership details for a genome group. diff --git a/src/ensembl/production/metadata/api/adaptors/release.py b/src/ensembl/production/metadata/api/adaptors/release.py index a2f42403..578c91a1 100644 --- a/src/ensembl/production/metadata/api/adaptors/release.py +++ b/src/ensembl/production/metadata/api/adaptors/release.py @@ -77,14 +77,10 @@ def _ensure_scalar(value): Returns: Scalar value or None """ - if value is None: - return None - # Unwrap single-element lists/tuples (pytest parametrization edge case) if isinstance(value, (list, tuple)) and len(value) == 1: value = value[0] - # If still a list/tuple, return as-is for IN clause handling return value @@ -117,7 +113,6 @@ def fetch_releases( """ release_select = db.select(EnsemblRelease).order_by(EnsemblRelease.version) - # Handle release_id parameter releases_id = check_parameter(release_id) if releases_id is not None: release_select = release_select.filter(EnsemblRelease.release_id.in_(releases_id)) @@ -135,16 +130,13 @@ def fetch_releases( release_version = float(release_version) release_select = release_select.filter(EnsemblRelease.version <= release_version) - # Filter for current releases only if current_only: release_select = release_select.filter(EnsemblRelease.is_current == 1) - # Filter by release type if release_type is not None: release_type = check_parameter(release_type) release_select = release_select.filter(EnsemblRelease.release_type.in_(release_type)) - # Filter by release label if release_label is not None: release_label = check_parameter(release_label) release_select = release_select.filter(EnsemblRelease.label.in_(release_label)) diff --git a/src/ensembl/production/metadata/api/models/taxonomy.py b/src/ensembl/production/metadata/api/models/taxonomy.py index fe5d066c..ca326032 100644 --- a/src/ensembl/production/metadata/api/models/taxonomy.py +++ b/src/ensembl/production/metadata/api/models/taxonomy.py @@ -13,6 +13,7 @@ from ensembl.ncbi_taxonomy.models import NCBITaxaName from ensembl.ncbi_taxonomy.models import NCBITaxaNode +__all__ = ['EnsemblTaxaNode', 'EnsemblTaxaName', 'NCBITaxaName', 'NCBITaxaNode'] class EnsemblTaxaNode(NCBITaxaNode): __tablename__ = 'ncbi_taxa_node' diff --git a/src/ensembl/production/metadata/api/search/search.py b/src/ensembl/production/metadata/api/search/search.py new file mode 100644 index 00000000..f40eb1b3 --- /dev/null +++ b/src/ensembl/production/metadata/api/search/search.py @@ -0,0 +1,342 @@ +# regarding copyright ownership. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Optional, List + +from pydantic import BaseModel +from sqlalchemy.orm import Session, joinedload + +from ensembl.production.metadata.api.models import ( + Genome, Dataset, DatasetAttribute, + EnsemblRelease, GenomeRelease, GenomeDataset, Attribute, + ReleaseStatus +) + + +# ============================================================================ +# PYDANTIC SCHEMAS +# ============================================================================ + +class GenomeSearchDocument(BaseModel): + """Schema for genome search indexing""" + + # Direct fields from Genome/Organism/Assembly + genome_uuid: str + common_name: Optional[str] = None + scientific_name: str + strain_type: Optional[str] = None + strain: Optional[str] = None + assembly_name: str + accession: str + url_name: Optional[str] = None + tol_id: Optional[str] = None + is_reference: bool + species_taxonomy_id: int + taxonomy_id: int + scientific_parlance_name: Optional[str] = None + organism_id: int + rank: int = 0 + + # Complex derived fields from datasets + contig_n50: int + coding_genes: int + has_variation: bool = False + has_regulation: bool = False + genebuild_provider: str + genebuild_method_display: str + + # Release information + release_type: str + release_label: str + release_id: int + + class Config: + from_attributes = True + + +# ============================================================================ +# QUERY HELPER CLASS +# ============================================================================ + +class GenomeSearchQueryHelper: + """Handles complex queries for extracting genome search data""" + + def __init__(self, session: Session): + self.session = session + + def _get_dataset_attribute_value( + self, + genome_id: int, + release_id: int, + release_type: str, + dataset_type_name: str, + attribute_name: str + ) -> Optional[str]: + """ + Get dataset attribute value with complex release logic + + If release is integrated: use dataset with matching release_id + If release is partial: use is_current dataset + """ + # Build the query + query = ( + self.session.query(DatasetAttribute.value) + .join(Attribute, DatasetAttribute.attribute_id == Attribute.attribute_id) + .join(Dataset, DatasetAttribute.dataset_id == Dataset.dataset_id) + .join(Dataset.dataset_type) + .join(GenomeDataset, Dataset.dataset_id == GenomeDataset.dataset_id) + .filter( + GenomeDataset.genome_id == genome_id, + Dataset.dataset_type.has(name=dataset_type_name), + Attribute.name == attribute_name + ) + ) + + # Apply release-specific filtering + if release_type == 'integrated': + query = query.filter(GenomeDataset.release_id == release_id) + else: # partial + query = query.filter(GenomeDataset.is_current == 1) + + result = query.first() + return result[0] if result else None + + def _has_dataset_type( + self, + genome_id: int, + release_id: int, + release_type: str, + dataset_type_name: str + ) -> bool: + """Check if genome has a dataset of specific type""" + query = ( + self.session.query(GenomeDataset) + .join(Dataset, GenomeDataset.dataset_id == Dataset.dataset_id) + .join(Dataset.dataset_type) + .filter( + GenomeDataset.genome_id == genome_id, + Dataset.dataset_type.has(name=dataset_type_name) + ) + ) + + if release_type == 'integrated': + query = query.filter(GenomeDataset.release_id == release_id) + else: # partial + query = query.filter(GenomeDataset.is_current == 1) + + return self.session.query(query.exists()).scalar() + + def _get_genebuild_provider( + self, + genome: Genome, + release_id: int, + release_type: str + ) -> Optional[str]: + """Get genebuild provider with fallback logic""" + # Try to get from dataset attribute first + provider = self._get_dataset_attribute_value( + genome.genome_id, + release_id, + release_type, + 'genebuild', + 'genebuild.provider_name_display' + ) + + # Fallback to genome.provider_name + return provider if provider else genome.provider_name + + def extract_genome_data( + self, + genome: Genome, + release: EnsemblRelease + ) -> dict: + """Extract all required data from genome for given release""" + + return { + # Direct fields + 'genome_uuid': genome.genome_uuid, + 'common_name': genome.organism.common_name, + 'scientific_name': genome.organism.scientific_name, + 'strain_type': genome.organism.strain_type, + 'strain': genome.organism.strain, + 'assembly_name': genome.assembly.name, + 'accession': genome.assembly.accession, + 'url_name': genome.url_name, + 'tol_id': genome.organism.tol_id, + 'is_reference': bool(genome.assembly.is_reference), + 'species_taxonomy_id': genome.organism.species_taxonomy_id, + 'scientific_parlance_name': genome.organism.scientific_parlance_name, + 'organism_id': genome.organism_id, + 'rank': genome.organism.rank or 0, + + # Complex dataset fields + 'contig_n50': self._get_dataset_attribute_value( + genome.genome_id, release.release_id, release.release_type, + 'assembly', 'assembly.stats.contig_n50' + ), + 'coding_genes': self._get_dataset_attribute_value( + genome.genome_id, release.release_id, release.release_type, + 'genebuild', 'genebuild.stats.coding_genes' + ), + 'has_variation': self._has_dataset_type( + genome.genome_id, release.release_id, release.release_type, + 'variation' + ), + 'has_regulation': self._has_dataset_type( + genome.genome_id, release.release_id, release.release_type, + 'regulatory_features' + ), + 'genebuild_provider': self._get_genebuild_provider( + genome, release.release_id, release.release_type + ), + 'genebuild_method_display': self._get_dataset_attribute_value( + genome.genome_id, release.release_id, release.release_type, + 'genebuild', 'genebuild.method_display' + ), + + # Release fields + 'release_type': release.release_type, + 'release_label': release.label, + 'release_id': release.release_id, + } + + +# ============================================================================ +# MAIN SERVICE CLASS +# ============================================================================ + +class GenomeSearchIndexer: + """Service for generating genome search documents""" + + def __init__(self, session: Session): + self.session = session + self.query_helper = GenomeSearchQueryHelper(session) + + def _get_relevant_release(self, genome: Genome) -> Optional[EnsemblRelease]: + """ + Determine which release to use for a genome. + Prefer partial if exists, otherwise use integrated. + """ + releases = ( + self.session.query(EnsemblRelease) + .join(GenomeRelease) + .filter( + GenomeRelease.genome_id == genome.genome_id, + EnsemblRelease.status == ReleaseStatus.RELEASED + ) + .all() + ) + + # Check for partial release + partial_releases = [r for r in releases if r.release_type == 'partial'] + if partial_releases: + return partial_releases # Should only be one, but return list for consistency + + # Return all integrated releases + integrated_releases = [r for r in releases if r.release_type == 'integrated'] + return integrated_releases if integrated_releases else None + + def get_released_genomes(self) -> List[Genome]: + """Get all genomes that are released""" + return ( + self.session.query(Genome) + .join(GenomeRelease) + .join(EnsemblRelease) + .filter( + EnsemblRelease.status == ReleaseStatus.RELEASED, + Genome.suppressed == 0 + ) + .options( + joinedload(Genome.organism), + joinedload(Genome.assembly), + joinedload(Genome.genome_releases).joinedload(GenomeRelease.ensembl_release) + ) + .distinct() + .all() + ) + + def create_search_documents( + self, + genome: Genome + ) -> List[GenomeSearchDocument]: + """ + Create search documents for a genome. + Returns list because a genome can be in multiple integrated releases. + """ + releases = self._get_relevant_release(genome) + + if not releases: + return [] + + # Ensure releases is a list + if not isinstance(releases, list): + releases = [releases] + + documents = [] + for release in releases: + genome_data = self.query_helper.extract_genome_data(genome, release) + documents.append(GenomeSearchDocument(**genome_data)) + + return documents + + def generate_all_search_documents(self) -> List[GenomeSearchDocument]: + """Generate search documents for all released genomes""" + genomes = self.get_released_genomes() + all_documents = [] + + for genome in genomes: + documents = self.create_search_documents(genome) + all_documents.extend(documents) + + return all_documents + + def generate_search_documents_as_dicts(self) -> List[dict]: + """Generate search documents as dictionaries for indexing""" + documents = self.generate_all_search_documents() + return [doc.model_dump() for doc in documents] + + +# ============================================================================ +# USAGE EXAMPLES +# ============================================================================ + +def index_genomes_for_search(session: Session): + """Main entry point for generating search index data""" + indexer = GenomeSearchIndexer(session) + + # Get all documents as dicts ready for search indexing + search_documents = indexer.generate_search_documents_as_dicts() + + # Send to your search service (Elasticsearch, Solr, etc.) + # send_to_search_index(search_documents) + + return search_documents + + +def index_single_genome(session: Session, genome_uuid: str): + """Index a specific genome""" + indexer = GenomeSearchIndexer(session) + + genome = ( + session.query(Genome) + .filter(Genome.genome_uuid == genome_uuid) + .options( + joinedload(Genome.organism), + joinedload(Genome.assembly) + ) + .first() + ) + + if not genome: + raise ValueError(f"Genome {genome_uuid} not found") + + documents = indexer.create_search_documents(genome) + return [doc.model_dump() for doc in documents] diff --git a/src/tests/conftest.py b/src/tests/conftest.py index 492f78f5..45fb43ac 100644 --- a/src/tests/conftest.py +++ b/src/tests/conftest.py @@ -37,25 +37,22 @@ def test_dbs(request): src_path = db_config["src"] db_name = src_path.name - # Use pre-converted SQLite .db files with temporary copies for isolation sqlite_file = src_path.parent / f"{db_name}.db" - if not sqlite_file.exists(): - raise FileNotFoundError( - f"SQLite database not found: {sqlite_file}\n" - f"Please convert it first using your conversion script." - ) - - # Create temporary copy to ensure test isolation temp_dir = tempfile.mkdtemp(prefix=f"pytest_{db_name}_") temp_db_file = Path(temp_dir) / f"{db_name}_test.db" print(f"\n>>> Using SQLite database: {sqlite_file}") print(f" (temporary copy: {temp_db_file})") - shutil.copy2(sqlite_file, temp_db_file) + try: + shutil.copy2(sqlite_file, temp_db_file) + except FileNotFoundError as exc: + raise FileNotFoundError( + f"SQLite database not found: {sqlite_file}\n" + f"Please convert it first using your conversion script." + ) from exc - # Create connection to temporary copy db_url = f"sqlite:///{temp_db_file}" test_databases[db_name] = type("TestDB", (object,), { "dbc": DBConnection(db_url), @@ -66,12 +63,10 @@ def test_dbs(request): yield test_databases - # Cleanup - close SQLite connections and remove temporary files for db_name, test_db in test_databases.items(): if hasattr(test_db.dbc, 'dispose'): test_db.dbc.dispose() - # Remove temporary files and directories for temp_file, temp_dir in temp_resources: try: if temp_file.exists(): diff --git a/src/tests/test_dataset_factory.py b/src/tests/test_dataset_factory.py index 856f5bd9..b04eed88 100644 --- a/src/tests/test_dataset_factory.py +++ b/src/tests/test_dataset_factory.py @@ -101,14 +101,15 @@ def test_genebuild_workflow(self, test_dbs, dataset_factory): genebuild_uuid = 'a3352834-cea1-40aa-9dad-99981620c36b' # Test children creation with metadata_db.test_session_scope() as session: - genome = Genome(production_name="new_grch37", - assembly_id=40, - created=func.now(), - organism_id=9, - annotation_source="test", - genebuild_date="2026-04", - provider_name="test" - ) + genome = Genome( + production_name="new_grch37", + assembly_id=40, + created=func.now(), + organism_id=9, + annotation_source="test", + genebuild_date="2026-04", + provider_name="test" + ) session.add(genome) genebuild = Dataset( dataset_type_id=2, diff --git a/src/tests/test_protobuf_msg_factory.py b/src/tests/test_protobuf_msg_factory.py index d7dbc686..f2c3836f 100644 --- a/src/tests/test_protobuf_msg_factory.py +++ b/src/tests/test_protobuf_msg_factory.py @@ -109,12 +109,12 @@ def test_create_stats_by_organism_uuid(self, genome_conn): output = json_format.MessageToJson(msg_factory.create_stats_by_genome_uuid(input_data)[0]) output_dict = json.loads(output) - assert output_dict['genomeUuid'] == "a73351f7-93e7-11ec-a39d-005056b38ce3" + assert output_dict["genomeUuid"] == "a73351f7-93e7-11ec-a39d-005056b38ce3" # Don't assume order - search for the specific statistic - stats = output_dict['statistics'] + stats = output_dict["statistics"] assembly_accession_stat = next( - (s for s in stats if s['name'] == 'assembly.accession'), + (s for s in stats if s["name"] == "assembly.accession"), None ) diff --git a/src/tests/test_release_factory.py b/src/tests/test_release_factory.py index 0eee9277..182dd6e5 100644 --- a/src/tests/test_release_factory.py +++ b/src/tests/test_release_factory.py @@ -46,10 +46,8 @@ def test_init_release_default(self, test_dbs) -> None: label = "2028-09-11" date = datetime.strptime(label, "%Y-%m-%d").date() - try: - factory.init_release(label=label) - except Exception as e: - pytest.fail(f"Unexpected exception: {e}") + factory.init_release(label=label) + with metadata_db.session_scope() as session: release = session.query(EnsemblRelease).filter(EnsemblRelease.version == expected_version).one_or_none() @@ -211,7 +209,7 @@ class TestFactoryUtils: def test_get_genome_sets_by_assembly_and_provider(self, test_dbs) -> None: """ - Test `get_genome_sets_by_assembly_and_provider. + Test `get_genome_sets_by_assembly_and_provider'. Pretty bad test. We haven't populated the metadata here with an updated genome so it just returns an empty set. """ metadata_db = DBConnection(test_dbs['ensembl_genome_metadata'].dbc.url) diff --git a/src/tests/test_updater.py b/src/tests/test_updater.py index c702be88..e5100d6a 100644 --- a/src/tests/test_updater.py +++ b/src/tests/test_updater.py @@ -33,7 +33,7 @@ {'src': Path(__file__).parent / "databases/core_5"}, {'src': Path(__file__).parent / "databases/core_6"}, {'src': Path(__file__).parent / "databases/core_7"}, - {'src': Path(__file__).parent / "databases/core_8"} + {'src': Path(__file__).parent / "databases/core_8"}, ]], indirect=True) class TestUpdater: @@ -114,7 +114,6 @@ def test_update_assembly(self, test_dbs): test_dbs['ncbi_taxonomy'].dbc.url) test.process_core() - # Get the genome_uuid that was just inserted core_3_db = DBConnection(test_dbs['core_3'].dbc.url) with core_3_db.session_scope() as core_session: inserted_meta = core_session.query(Meta).filter( @@ -150,19 +149,15 @@ def test_update_geneset(self, test_dbs): ).first() inserted_genome_uuid = inserted_meta.meta_value - # Now query the metadata database for THIS SPECIFIC genome metadata_db = DBConnection(test_dbs['ensembl_genome_metadata'].dbc.url) with metadata_db.session_scope() as session: - # Get the genome that was just created genome = session.query(Genome).filter( Genome.genome_uuid == inserted_genome_uuid ).one() - - # Get the genebuild dataset for THIS genome genebuild_dataset = session.query(Dataset).join(GenomeDataset).join(Genome).filter( Genome.genome_uuid == inserted_genome_uuid, Dataset.name == "genebuild" - ).one() # ← ADD THIS! + ).one() assert genebuild_dataset is not None From 3325c609283da7b135336d6c58a35ae8a64127c8 Mon Sep 17 00:00:00 2001 From: danielp Date: Thu, 6 Nov 2025 12:48:04 +0000 Subject: [PATCH 12/14] Update to match Jorge's suggestions. Initial search outline --- .../production/metadata/updater/core.py | 72 +- src/tests/test_exports.py | 1101 ----------------- src/tests/test_exports_changelog.py | 489 ++++++++ src/tests/test_exports_json.py | 321 +++++ src/tests/test_exports_stats.py | 342 +++++ src/tests/test_release_factory.py | 2 +- src/tests/test_scripts.py | 41 +- 7 files changed, 1212 insertions(+), 1156 deletions(-) delete mode 100644 src/tests/test_exports.py create mode 100644 src/tests/test_exports_changelog.py create mode 100644 src/tests/test_exports_json.py create mode 100644 src/tests/test_exports_stats.py diff --git a/src/ensembl/production/metadata/updater/core.py b/src/ensembl/production/metadata/updater/core.py index e612243d..c1f53b73 100644 --- a/src/ensembl/production/metadata/updater/core.py +++ b/src/ensembl/production/metadata/updater/core.py @@ -44,7 +44,8 @@ def __init__(self, db_uri, metadata_uri, taxonomy_uri, release=None): def _load_meta_dict(self): """Load metadata into meta_dict from the database. - Stores all values for each meta_key as a list to handle potential duplicates.""" + Stores all values for each meta_key as a list to handle potential duplicates. + """ with self.db.session_scope() as session: results = session.query(Meta).filter(Meta.meta_value.isnot(None), Meta.meta_value.notin_(['', 'Null', 'NULL'])).all() @@ -95,9 +96,7 @@ def get_meta_single_meta_key(self, species_id, parameter): if species_meta is None: return None - values = species_meta.get(parameter) - if values is None: - return None + values = species_meta.get(parameter, [None]) if len(values) > 1: raise exceptions.MetaException( @@ -235,8 +234,9 @@ def process_species(self, species_id, meta_session): organism = self.get_or_new_organism(species_id, meta_session) assembly, assembly_dataset, assembly_dataset_attributes, assembly_sequences, dataset_source = self.get_or_new_assembly( species_id, meta_session) - genebuild_dataset, genebuild_dataset_attributes = self._create_genebuild(species_id, meta_session, - dataset_source) + genebuild_dataset, genebuild_dataset_attributes = self._create_genebuild( + species_id, meta_session, dataset_source + ) # Checking for an existing genome uuid: old_genome_uuid = self.get_meta_single_meta_key(species_id, "genome.genome_uuid") @@ -352,7 +352,7 @@ def new_genome(self, meta_session, species_id, organism, assembly, assembly_data production_name=production_name, url_name=url_name, annotation_source=annotation_source, - provider_name=provider_name + provider_name=provider_name, ) logger.debug(f"Assigning genome {new_genome.genome_uuid} to {planned_release.version}") meta_session.add(new_genome) @@ -406,7 +406,7 @@ def _create_genome_group_members(self, meta_session, species_id, new_genome, pla genome_group_names = self.get_meta_all_values(species_id, "genome.genome_group") if not genome_group_names: - return + return None for group_name in genome_group_names: # Check if the genome group exists @@ -425,7 +425,7 @@ def _create_genome_group_members(self, meta_session, species_id, new_genome, pla genome_group=genome_group, ensembl_release=planned_release, is_current=1, - is_reference=0 + is_reference=0, ) meta_session.add(genome_group_member) logger.info(f"Added genome {new_genome.genome_uuid} to genome group '{group_name}'") @@ -471,7 +471,7 @@ def get_or_new_organism(self, species_id, meta_session): strain=self.get_meta_single_meta_key(species_id, "organism.strain"), strain_type=self.get_meta_single_meta_key(species_id, "organism.type"), scientific_parlance_name=self.get_meta_single_meta_key(species_id, "organism.scientific_parlance_name"), - tol_id=tol_id + tol_id=tol_id, ) # Query the metadata database to find if an Organism with the same Ensembl name already exists. @@ -543,16 +543,20 @@ def get_assembly_sequences(self, species_id, assembly): synonym_dict = defaultdict(list) accession_info = defaultdict( lambda: { - "length": None, "location": None, "chromosomal": None, - "karyotype_rank": None, "type": None, "is_circular": 0 + "length": None, + "location": None, + "chromosomal": None, + "karyotype_rank": None, + "type": None, + "is_circular": 0, }) location_mapping = { - 'nuclear_chromosome': 'SO:0000738', - 'mitochondrial_chromosome': 'SO:0000737', - 'chloroplast_chromosome': 'SO:0000745', - 'apicoplast_chromosome': 'SO:0001259', - None: 'SO:0000738', + "nuclear_chromosome": "SO:0000738", + "mitochondrial_chromosome": "SO:0000737", + "chloroplast_chromosome": "SO:0000745", + "apicoplast_chromosome": "SO:0001259", + None: "SO:0000738", } for seq_region_name, seq_region_length, coord_system_name, synonym, is_circular in results: @@ -603,6 +607,13 @@ def get_assembly_sequences(self, species_id, assembly): return assembly_sequences, sequence_aliases + ENA_ACCESSION_PATTERNS = [ + re.compile(r'^[A-Z]{1}[0-9]{5}\.[0-9]+$'), + re.compile(r'^[A-Z]{2}[0-9]{6}\.[0-9]+$'), + re.compile(r'^[A-Z]{2}[0-9]{8}$'), + re.compile(r'^[A-Z]{4}[0-9]{2}S?[0-9]{6,8}$'), + re.compile(r'^[A-Z]{6}[0-9]{2}S?[0-9]{7,9}$'), + ] def _is_valid_ena_accession(self, identifier): """ Check if an identifier matches ENA sequence identifier rules for annotated sequences. @@ -617,15 +628,7 @@ def _is_valid_ena_accession(self, identifier): Returns: bool: True if identifier matches any pattern """ - patterns = [ - r'^[A-Z]{1}[0-9]{5}\.[0-9]+$', - r'^[A-Z]{2}[0-9]{6}\.[0-9]+$', - r'^[A-Z]{2}[0-9]{8}$', - r'^[A-Z]{4}[0-9]{2}S?[0-9]{6,8}$', - r'^[A-Z]{6}[0-9]{2}S?[0-9]{7,9}$', - ] - - return any(re.match(pattern, identifier) for pattern in patterns) + return any(pattern.match(identifier) for pattern in ENA_ACCESSION_PATTERNS) def _get_valid_accession(self, seq_region_name, synonyms): """ @@ -704,7 +707,7 @@ def get_or_new_assembly(self, species_id, meta_session, source=None): dataset_source) # No exact match found - either error or force new - if force_new_uuid == "1" or force_new_uuid == 1: + if int(force_new_uuid) == 1: return self._create_new_assembly(species_id, meta_session, dataset_source, assembly_accession) # Return error describing discrepancies @@ -722,15 +725,11 @@ def _find_matching_assembly(self, assemblies, incoming_names, incoming_count): # Filter to assemblies with matching count count_matches = [a for a in assemblies if len(a.assembly_sequences) == incoming_count] - if not count_matches: - return None - # From those, find one with matching names for assembly in count_matches: existing_names = {seq.name for seq in assembly.assembly_sequences} if existing_names == incoming_names: return assembly - return None def _get_incoming_sequence_names(self, species_id): @@ -801,7 +800,8 @@ def _attach_to_existing_assembly(self, assembly, meta_session, assembly_accessio if assembly_dataset is None: raise exceptions.MetadataUpdateException( - f"Assembly {assembly_accession} exists but no valid (non-faulty) assembly dataset found") + f"Assembly {assembly_accession} exists but no valid (non-faulty) assembly dataset found" + ) assembly_dataset_attributes = assembly_dataset.dataset_attributes assembly_sequences = assembly.assembly_sequences @@ -815,9 +815,9 @@ def _create_new_assembly(self, species_id, meta_session, dataset_source, assembl with self.db.session_scope() as session: level = (session.execute(db.select(CoordSystem.name).filter( CoordSystem.species_id == species_id).order_by(CoordSystem.rank)).all())[0][0] - accession_body = self.get_meta_single_meta_key(species_id, - "assembly.accession_body") if self.get_meta_single_meta_key( - species_id, "assembly.accession_body") else "INSDC" + accession_body = self.get_meta_single_meta_key(species_id, "assembly.accession_body") + if not accession_body: + accession_body = "INSDC" assembly = Assembly( ucsc_name=self.get_meta_single_meta_key(species_id, "assembly.ucsc_alias"), @@ -828,7 +828,7 @@ def _create_new_assembly(self, species_id, meta_session, dataset_source, assembl assembly_default=self.get_meta_single_meta_key(species_id, "assembly.default"), created=func.now(), assembly_uuid=str(uuid.uuid4()), - is_reference=is_reference + is_reference=is_reference, ) dataset_factory = DatasetFactory(self.metadata_uri) diff --git a/src/tests/test_exports.py b/src/tests/test_exports.py deleted file mode 100644 index 2abcbe2f..00000000 --- a/src/tests/test_exports.py +++ /dev/null @@ -1,1101 +0,0 @@ -# See the NOTICE file distributed with this work for additional information -# regarding copyright ownership. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import csv -import json -from pathlib import Path - -import pytest - -from ensembl.production.metadata.api.exports.changelog_generator import ChangelogGenerator -from ensembl.production.metadata.api.exports.ftp_index import FTPMetadataExporter -from ensembl.production.metadata.api.exports.stats_generator import StatsGenerator -from ensembl.production.metadata.api.models import Genome, ReleaseStatus, EnsemblRelease - -db_directory = Path(__file__).parent / 'databases' -db_directory = db_directory.resolve() - - -@pytest.mark.parametrize("test_dbs", [[{'src': Path(__file__).parent / "databases/ensembl_genome_metadata"}, - {'src': Path(__file__).parent / "databases/ncbi_taxonomy"}, - ]], indirect=True) -class TestStatsGenerator: - """Test suite for StatsGenerator class.""" - - def test_init_valid_uri(self, test_dbs): - """Test StatsGenerator initialization with valid metadata URI.""" - metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url - generator = StatsGenerator(metadata_uri) - assert generator.metadata_db is not None - assert generator.output_path == Path.cwd() - - def test_init_with_output_path(self, test_dbs, tmp_path): - """Test StatsGenerator initialization with custom output path.""" - metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url - output_path = tmp_path / "test_output" - generator = StatsGenerator(metadata_uri, output_path=str(output_path)) - assert generator.metadata_db is not None - assert generator.output_path == output_path - assert output_path.exists() - - def test_init_invalid_uri_empty(self, test_dbs): - """Test StatsGenerator initialization fails with empty URI.""" - with pytest.raises(ValueError) as excinfo: - StatsGenerator("") - assert "metadata_uri must be a non-empty string" in str(excinfo.value) - - def test_init_invalid_uri_none(self, test_dbs): - """Test StatsGenerator initialization fails with None URI.""" - with pytest.raises(ValueError) as excinfo: - StatsGenerator(None) - assert "metadata_uri must be a non-empty string" in str(excinfo.value) - - def test_init_invalid_uri_not_string(self, test_dbs): - """Test StatsGenerator initialization fails with non-string URI.""" - with pytest.raises(ValueError) as excinfo: - StatsGenerator(123) - assert "metadata_uri must be a non-empty string" in str(excinfo.value) - - def test_get_partial_data(self, test_dbs): - """Test get_partial_data returns correct structure and values.""" - metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url - generator = StatsGenerator(metadata_uri) - partial_data = generator.get_partial_data() - assert isinstance(partial_data, list) - if len(partial_data) > 0: - first_release = partial_data[0] - required_keys = [ - 'release', 'new_genomes', 'total_genomes', - 'new_assemblies', 'total_assemblies', - 'new_variation_datasets', 'total_variation_datasets', - 'new_regulation_datasets', 'total_regulation_datasets' - ] - for key in required_keys: - assert key in first_release, f"Missing key: {key}" - assert isinstance(first_release['release'], str) - assert isinstance(first_release['new_genomes'], int) - assert isinstance(first_release['total_genomes'], int) - assert isinstance(first_release['new_assemblies'], int) - assert isinstance(first_release['total_assemblies'], int) - assert isinstance(first_release['new_variation_datasets'], int) - assert isinstance(first_release['total_variation_datasets'], int) - assert isinstance(first_release['new_regulation_datasets'], int) - assert isinstance(first_release['total_regulation_datasets'], int) - # Verify cumulative totals are non-decreasing - for i in range(1, len(partial_data)): - assert partial_data[i]['total_genomes'] >= partial_data[i - 1]['total_genomes'] - assert partial_data[i]['total_assemblies'] >= partial_data[i - 1]['total_assemblies'] - assert partial_data[i]['total_variation_datasets'] >= partial_data[i - 1]['total_variation_datasets'] - assert partial_data[i]['total_regulation_datasets'] >= partial_data[i - 1]['total_regulation_datasets'] - - assert len(partial_data) == 2 - assert partial_data[0]['release'] == '2020-10-18' - assert partial_data[0]['new_genomes'] == 3 - - def test_get_partial_data_specific_values(self, test_dbs): - """Test get_partial_data returns specific expected values from test database.""" - metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url - generator = StatsGenerator(metadata_uri) - partial_data = generator.get_partial_data() - - assert len(partial_data) == 2 - if len(partial_data) >= 1: - assert partial_data[0]['release'] == '2020-10-18' - assert partial_data[0]['new_genomes'] == 3 - assert partial_data[0]['total_genomes'] == 3 - - def test_get_integrated_data(self, test_dbs): - """Test get_integrated_data returns correct structure and values.""" - metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url - generator = StatsGenerator(metadata_uri) - integrated_data = generator.get_integrated_data() - - assert isinstance(integrated_data, list) - - # Don't actually have any integrated data in the test db. - # TODO: Add some integrated data. - if len(integrated_data) > 0: - first_release = integrated_data[0] - required_keys = [ - 'release', 'genomes', 'assemblies', - 'variation_datasets', 'regulation_datasets' - ] - for key in required_keys: - assert key in first_release, f"Missing key: {key}" - assert isinstance(first_release['release'], str) - assert isinstance(first_release['genomes'], int) - assert isinstance(first_release['assemblies'], int) - assert isinstance(first_release['variation_datasets'], int) - assert isinstance(first_release['regulation_datasets'], int) - - assert len(integrated_data) == 0 - assert integrated_data[0]['release'] == '2025-01' - assert integrated_data[0]['genomes'] == 12 - - # def test_get_integrated_data_specific_values(self, test_dbs): - # """Test get_integrated_data returns specific expected values from test database.""" - # metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url - # generator = StatsGenerator(metadata_uri) - # - # integrated_data = generator.get_integrated_data() - # - # assert len(integrated_data) == 2 - # if len(integrated_data) >= 1: - # assert integrated_data[0]['release'] == '112' - # assert integrated_data[0]['genomes'] == 50 - # assert integrated_data[0]['assemblies'] == 45 - - def test_count_datasets(self, test_dbs): - """Test _count_datasets returns correct count for a specific release and dataset type.""" - metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url - generator = StatsGenerator(metadata_uri) - with generator.metadata_db.session_scope() as session: - release_id = 1 - variation_count = generator._count_datasets(session, release_id, 'variation') - assert variation_count == 3 - - regulation_count = generator._count_datasets(session, release_id, 'regulatory_features') - assert regulation_count == 0 - pass - - def test_count_and_get_dataset_ids(self, test_dbs): - """Test _count_and_get_dataset_ids returns correct count and IDs.""" - metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url - generator = StatsGenerator(metadata_uri) - - with generator.metadata_db.session_scope() as session: - release_id = 1 - count, dataset_ids = generator._count_and_get_dataset_ids( - session, release_id, 'variation' - ) - - assert isinstance(count, int) - assert isinstance(dataset_ids, set) - assert count == len(dataset_ids) - assert count == 3 - pass - - def test_export_to_csv(self, test_dbs, tmp_path): - """Test export_to_csv creates files with correct structure.""" - metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url - output_path = tmp_path / "csv_output" - generator = StatsGenerator(metadata_uri, output_path=str(output_path)) - - # Create sample data - partial_data = [ - { - 'release': 'R1', - 'new_genomes': 10, - 'total_genomes': 10, - 'new_assemblies': 8, - 'total_assemblies': 8, - 'new_variation_datasets': 5, - 'total_variation_datasets': 5, - 'new_regulation_datasets': 3, - 'total_regulation_datasets': 3, - } - ] - - integrated_data = [ - { - 'release': 'R1', - 'genomes': 10, - 'assemblies': 8, - 'variation_datasets': 5, - 'regulation_datasets': 3, - } - ] - - generator.export_to_csv(partial_data, integrated_data) - - partial_file = output_path / 'stats.partial.csv' - integrated_file = output_path / 'stats.integrated.csv' - assert partial_file.exists() - assert integrated_file.exists() - with open(partial_file, 'r') as f: - reader = csv.DictReader(f) - rows = list(reader) - assert len(rows) == 1 - assert rows[0]['release'] == 'R1' - assert rows[0]['new_genomes'] == '10' - assert rows[0]['total_genomes'] == '10' - with open(integrated_file, 'r') as f: - reader = csv.DictReader(f) - rows = list(reader) - assert len(rows) == 1 - assert rows[0]['release'] == 'R1' - assert rows[0]['genomes'] == '10' - assert rows[0]['assemblies'] == '8' - - def test_export_to_csv_sorting(self, test_dbs, tmp_path): - """Test export_to_csv sorts data by release label.""" - metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url - output_path = tmp_path / "csv_output_sorted" - generator = StatsGenerator(metadata_uri, output_path=str(output_path)) - partial_data = [ - {'release': 'R3', 'new_genomes': 30, 'total_genomes': 60, - 'new_assemblies': 20, 'total_assemblies': 50, - 'new_variation_datasets': 10, 'total_variation_datasets': 30, - 'new_regulation_datasets': 5, 'total_regulation_datasets': 15}, - {'release': 'R1', 'new_genomes': 10, 'total_genomes': 10, - 'new_assemblies': 8, 'total_assemblies': 8, - 'new_variation_datasets': 5, 'total_variation_datasets': 5, - 'new_regulation_datasets': 3, 'total_regulation_datasets': 3}, - {'release': 'R2', 'new_genomes': 20, 'total_genomes': 30, - 'new_assemblies': 12, 'total_assemblies': 20, - 'new_variation_datasets': 5, 'total_variation_datasets': 10, - 'new_regulation_datasets': 2, 'total_regulation_datasets': 5}, - ] - - generator.export_to_csv(partial_data, []) - partial_file = output_path / 'stats.partial.csv' - assert partial_file.exists() - - with open(partial_file, 'r') as f: - reader = csv.DictReader(f) - rows = list(reader) - assert len(rows) == 3 - assert rows[0]['release'] == 'R1' - assert rows[1]['release'] == 'R2' - assert rows[2]['release'] == 'R3' - - def test_export_to_csv_empty_data(self, test_dbs, tmp_path): - """Test export_to_csv handles empty data correctly.""" - metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url - output_path = tmp_path / "csv_output_empty" - generator = StatsGenerator(metadata_uri, output_path=str(output_path)) - generator.export_to_csv([], []) - - partial_file = output_path / 'stats.partial.csv' - integrated_file = output_path / 'stats.integrated.csv' - - assert partial_file.exists() - assert integrated_file.exists() - with open(partial_file, 'r') as f: - reader = csv.DictReader(f) - rows = list(reader) - assert len(rows) == 0 - - with open(integrated_file, 'r') as f: - reader = csv.DictReader(f) - rows = list(reader) - assert len(rows) == 0 - - def test_generate_integration(self, test_dbs, tmp_path): - """Test generate method integrates all components correctly.""" - metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url - output_path = tmp_path / "generate_output" - generator = StatsGenerator(metadata_uri, output_path=str(output_path)) - generator.generate() - partial_file = output_path / 'stats.partial.csv' - integrated_file = output_path / 'stats.integrated.csv' - - assert partial_file.exists() - assert integrated_file.exists() - with open(partial_file, 'r') as f: - reader = csv.DictReader(f) - assert reader.fieldnames is not None - partial_fieldnames = [ - 'release', 'new_genomes', 'total_genomes', - 'new_assemblies', 'total_assemblies', - 'new_variation_datasets', 'total_variation_datasets', - 'new_regulation_datasets', 'total_regulation_datasets' - ] - assert reader.fieldnames == partial_fieldnames - - with open(integrated_file, 'r') as f: - reader = csv.DictReader(f) - assert reader.fieldnames is not None - integrated_fieldnames = [ - 'release', 'genomes', 'assemblies', - 'variation_datasets', 'regulation_datasets' - ] - assert reader.fieldnames == integrated_fieldnames - - def test_partial_data_ordering(self, test_dbs): - """Test that partial data is returned in correct order by release label.""" - metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url - generator = StatsGenerator(metadata_uri) - partial_data = generator.get_partial_data() - if len(partial_data) > 1: - release_labels = [item['release'] for item in partial_data] - assert release_labels == sorted(release_labels) - - def test_integrated_data_ordering(self, test_dbs): - """Test that integrated data is returned in correct order by release label.""" - metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url - generator = StatsGenerator(metadata_uri) - integrated_data = generator.get_integrated_data() - if len(integrated_data) > 1: - release_labels = [item['release'] for item in integrated_data] - assert release_labels == sorted(release_labels) - - -@pytest.mark.parametrize("test_dbs", [[{'src': Path(__file__).parent / "databases/ensembl_genome_metadata"}, - {'src': Path(__file__).parent / "databases/ncbi_taxonomy"}, - ]], indirect=True) -class TestFTPMetadataExporter: - """Test suite for FTPMetadataExporter class.""" - - def test_init_valid_uri(self, test_dbs): - """Test FTPMetadataExporter initialization with valid metadata URI.""" - metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url - exporter = FTPMetadataExporter(metadata_uri) - assert exporter.metadata_db is not None - - def test_export_to_json_returns_dict(self, test_dbs): - """Test export_to_json returns dictionary when no output file specified.""" - metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url - exporter = FTPMetadataExporter(metadata_uri) - result = exporter.export_to_json() - assert isinstance(result, dict) - assert 'last_updated' in result - assert 'species' in result - assert isinstance(result['species'], dict) - - def test_export_to_json_creates_file(self, test_dbs, tmp_path): - """Test export_to_json creates file when output_file is specified.""" - metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url - exporter = FTPMetadataExporter(metadata_uri) - output_file = tmp_path / "ftp_metadata.json" - result = exporter.export_to_json(str(output_file)) - assert result is None - assert output_file.exists() - with open(output_file, 'r') as f: - data = json.load(f) - assert 'last_updated' in data - assert 'species' in data - - def test_build_ftp_metadata_json_structure(self, test_dbs): - """Test build_ftp_metadata_json returns correct structure.""" - metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url - exporter = FTPMetadataExporter(metadata_uri) - metadata = exporter.build_ftp_metadata_json() - assert isinstance(metadata, dict) - assert 'last_updated' in metadata - assert 'species' in metadata - assert isinstance(metadata['species'], dict) - first_species = next(iter(metadata['species'].values())) - assert 'assemblies' in first_species - assert isinstance(first_species['assemblies'], dict) - - def test_load_all_genome_data(self, test_dbs): - """Test _load_all_genome_data returns correct structure.""" - metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url - exporter = FTPMetadataExporter(metadata_uri) - with exporter.metadata_db.session_scope() as session: - genome_data = exporter._load_all_genome_data(session) - assert isinstance(genome_data, dict) - - first_genome_uuid = next(iter(genome_data.keys())) - first_genome_data = genome_data[first_genome_uuid] - assert 'genome' in first_genome_data - assert 'datasets' in first_genome_data - assert 'attributes' in first_genome_data - assert 'genebuild_metadata' in first_genome_data - assert isinstance(first_genome_data['datasets'], list) - assert isinstance(first_genome_data['attributes'], dict) - - def test_normalize_species_name(self, test_dbs): - """Test _normalize_species_name correctly normalizes species names.""" - metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url - exporter = FTPMetadataExporter(metadata_uri) - assert exporter._normalize_species_name('homo sapiens') == 'homo_sapiens' - assert exporter._normalize_species_name('species.name') == 'species_name' - assert exporter._normalize_species_name('species__name') == 'species_name' - assert exporter._normalize_species_name('species___name') == 'species_name' - assert exporter._normalize_species_name('homo. sapiens') == 'homo_sapiens' - assert exporter._normalize_species_name('homo sapiens') == 'homo_sapiens' - assert exporter._normalize_species_name('') == '' - assert exporter._normalize_species_name('homo_sapiens') == 'homo_sapiens' - assert exporter._normalize_species_name('Homo. Sapiens') == 'Homo_Sapiens' - assert exporter._normalize_species_name('homo sapiens') == 'homo_sapiens' - assert exporter._normalize_species_name(' homo sapiens ') == '_homo_sapiens_' - - def test_extract_provider_from_path(self, test_dbs): - """Test _extract_provider_from_path extracts provider correctly.""" - metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url - exporter = FTPMetadataExporter(metadata_uri) - genebuild_metadata = { - 'genebuild_source_name': 'Ensembl' - } - assert exporter._extract_provider_from_path(genebuild_metadata) == 'ensembl' - genebuild_metadata = { - 'genebuild_source_name': 'REFSEQ' - } - assert exporter._extract_provider_from_path(genebuild_metadata) == 'refseq' - assert exporter._extract_provider_from_path(None) == 'unknown' - assert exporter._extract_provider_from_path({}) == 'unknown' - - def test_extract_genebuild_release_info(self, test_dbs): - """Test _extract_genebuild_release_info extracts release correctly.""" - metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url - exporter = FTPMetadataExporter(metadata_uri) - genebuild_metadata = { - 'last_geneset_update': '2024-01-01' - } - result = exporter._extract_genebuild_release_info(genebuild_metadata) - assert result['release'] == '2024_01' - genebuild_metadata = { - 'last_geneset_update': '2023-12-15' - } - result = exporter._extract_genebuild_release_info(genebuild_metadata) - assert result['release'] == '2023_12' - result = exporter._extract_genebuild_release_info(None) - assert result['release'] == 'unknown' - result = exporter._extract_genebuild_release_info({}) - assert result['release'] == 'unknown' - genebuild_metadata = { - 'last_geneset_update': 'invalid-date' - } - result = exporter._extract_genebuild_release_info(genebuild_metadata) - assert result['release'] == 'unknown' - - def test_extract_release_info_from_ensembl_release(self, test_dbs): - """Test _extract_release_info_from_ensembl_release extracts release correctly.""" - metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url - exporter = FTPMetadataExporter(metadata_uri) - with exporter.metadata_db.session_scope() as session: - genome = session.query(Genome).first() - - if genome: - result = exporter._extract_release_info_from_ensembl_release(genome) - - assert isinstance(result, dict) - assert 'release' in result - has_released = any( - gr.ensembl_release and gr.ensembl_release.status == ReleaseStatus.RELEASED - for gr in genome.genome_releases - ) - if has_released: - assert result['release'] != 'unknown' - - def test_has_released_dataset_bulk(self, test_dbs): - """Test _has_released_dataset_bulk correctly identifies dataset types.""" - metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url - exporter = FTPMetadataExporter(metadata_uri) - - datasets = [ - {'dataset_type_name': 'genebuild'}, - {'dataset_type_name': 'assembly'} - ] - assert exporter._has_released_dataset_bulk(datasets, 'genebuild') is True - assert exporter._has_released_dataset_bulk(datasets, 'assembly') is True - assert exporter._has_released_dataset_bulk(datasets, 'variation') is False - datasets = [ - {'dataset_type_name': 'regulatory_features'} - ] - assert exporter._has_released_dataset_bulk(datasets, 'regulation') is True - assert exporter._has_released_dataset_bulk([], 'genebuild') is False - - def test_get_dataset_file_paths_genebuild(self, test_dbs): - """Test _get_dataset_file_paths generates correct file paths for genebuild.""" - metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url - exporter = FTPMetadataExporter(metadata_uri) - base_path = "homo_sapiens/GRCh38/ensembl/geneset/2024_01" - with exporter.metadata_db.session_scope() as session: - genome = session.query(Genome).first() - assembly_data = {'accession': 'GRCh38'} if genome else {} - file_paths = exporter._get_dataset_file_paths( - base_path, 'genebuild', genome, assembly_data - ) - - assert 'annotations' in file_paths - assert 'cdna.fa.gz' in file_paths['annotations'] - assert 'genes.gff3.gz' in file_paths['annotations'] - assert 'genes.gtf.gz' in file_paths['annotations'] - assert 'pep.fa.gz' in file_paths['annotations'] - assert 'vep' in file_paths - assert 'genes.gff3.bgz' in file_paths['vep'] - - def test_get_dataset_file_paths_assembly(self, test_dbs): - """Test _get_dataset_file_paths generates correct file paths for assembly.""" - metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url - exporter = FTPMetadataExporter(metadata_uri) - base_path = "homo_sapiens/GRCh38/genome" - with exporter.metadata_db.session_scope() as session: - genome = session.query(Genome).first() - assembly_data = {'accession': 'GRCh38'} if genome else {} - file_paths = exporter._get_dataset_file_paths( - base_path, 'assembly', genome, assembly_data - ) - assert 'genome_sequences' in file_paths - assert 'chromosomes.tsv.gz' in file_paths['genome_sequences'] - assert 'hardmasked.fa.gz' in file_paths['genome_sequences'] - assert 'softmasked.fa.gz' in file_paths['genome_sequences'] - assert 'unmasked.fa.gz' in file_paths['genome_sequences'] - assert 'vep' in file_paths - assert 'softmasked.fa.bgz' in file_paths['vep'] - - def test_get_dataset_file_paths_variation(self, test_dbs): - """Test _get_dataset_file_paths generates correct file paths for variation.""" - metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url - exporter = FTPMetadataExporter(metadata_uri) - base_path = "homo_sapiens/GRCh38/ensembl/variation/2024_01" - with exporter.metadata_db.session_scope() as session: - genome = session.query(Genome).first() - assembly_data = {'accession': 'GRCh38'} if genome else {} - file_paths = exporter._get_dataset_file_paths( - base_path, 'variation', genome, assembly_data - ) - assert 'variation_data' in file_paths - assert 'variation.vcf.gz' in file_paths['variation_data'] - - def test_get_dataset_file_paths_regulation(self, test_dbs): - """Test _get_dataset_file_paths generates correct file paths for regulation.""" - metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url - exporter = FTPMetadataExporter(metadata_uri) - base_path = "homo_sapiens/GRCh38/ensembl/regulation" - with exporter.metadata_db.session_scope() as session: - genome = session.query(Genome).first() - assembly_data = {'accession': 'GRCh38'} if genome else {} - file_paths = exporter._get_dataset_file_paths( - base_path, 'regulation', genome, assembly_data - ) - - assert 'regulatory_features' in file_paths - assert 'regulation.gff' in file_paths['regulatory_features'] - - def test_get_dataset_file_paths_homologies(self, test_dbs): - """Test _get_dataset_file_paths generates correct file paths for homologies.""" - metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url - exporter = FTPMetadataExporter(metadata_uri) - base_path = "homo_sapiens/GRCh38/ensembl/homology/2024_01" - with exporter.metadata_db.session_scope() as session: - genome = session.query(Genome).first() - if genome: - assembly_data = {'accession': genome.assembly.accession} - file_paths = exporter._get_dataset_file_paths( - base_path, 'homologies', genome, assembly_data - ) - assert 'homology_data' in file_paths - homology_files = file_paths['homology_data'] - assert len(homology_files) > 0 - first_file = next(iter(homology_files.keys())) - assert 'homology.tsv.gz' in first_file - - def test_export_json_with_actual_data(self, test_dbs): - """Test export generates valid JSON structure with actual database data.""" - metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url - exporter = FTPMetadataExporter(metadata_uri) - metadata = exporter.export_to_json() - assert metadata is not None - assert 'last_updated' in metadata - assert 'species' in metadata - - for species_name, species_data in metadata['species'].items(): - assert isinstance(species_name, str) - assert 'assemblies' in species_data - for assembly_name, assembly_data in species_data['assemblies'].items(): - assert isinstance(assembly_name, str) - if 'providers' in assembly_data: - for provider_name, provider_data in assembly_data['providers'].items(): - assert isinstance(provider_name, str) - if 'releases' in provider_data: - for release_name, release_data in provider_data['releases'].items(): - assert isinstance(release_name, str) - if 'datasets' in release_data: - assert isinstance(release_data['datasets'], dict) - - def test_export_handles_empty_database(self, test_dbs): - """Test export handles database with no released genomes gracefully.""" - metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url - exporter = FTPMetadataExporter(metadata_uri) - metadata = exporter.export_to_json() - assert metadata is not None - assert 'last_updated' in metadata - assert 'species' in metadata - assert isinstance(metadata['species'], dict) - - def test_json_file_is_valid_json(self, test_dbs, tmp_path): - """Test that exported JSON file can be read back and is valid.""" - metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url - exporter = FTPMetadataExporter(metadata_uri) - output_file = tmp_path / "test_output.json" - exporter.export_to_json(str(output_file)) - - with open(output_file, 'r') as f: - data = json.load(f) - assert 'last_updated' in data - assert 'species' in data - from datetime import datetime - try: - datetime.fromisoformat(data['last_updated']) - except ValueError: - pytest.fail("last_updated is not in valid ISO format") - - -@pytest.mark.parametrize("test_dbs", [[{'src': Path(__file__).parent / "databases/ensembl_genome_metadata"}, - {'src': Path(__file__).parent / "databases/ncbi_taxonomy"}, - ]], indirect=True) -class TestChangelogGenerator: - """Test suite for ChangelogGenerator class.""" - - def test_init_valid_parameters(self, test_dbs): - """Test ChangelogGenerator initialization with valid parameters.""" - metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url - generator = ChangelogGenerator( - metadata_uri=metadata_uri, - release_label="2024-01" - ) - assert generator.metadata_db is not None - assert generator.release_label == "2024-01" - assert generator.output_path is None - - def test_init_with_output_path(self, test_dbs): - """Test ChangelogGenerator initialization with custom output path.""" - metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url - output_path = "/tmp/test_changelog.csv" - - generator = ChangelogGenerator( - metadata_uri=metadata_uri, - release_label="2024-01", - output_path=output_path - ) - assert generator.metadata_db is not None - assert generator.release_label == "2024-01" - assert generator.output_path == output_path - - def test_init_invalid_metadata_uri_empty(self, test_dbs): - """Test initialization fails with empty metadata URI.""" - with pytest.raises(ValueError) as excinfo: - ChangelogGenerator( - metadata_uri="", - release_label="2024-01" - ) - assert "metadata_uri must be a non-empty string" in str(excinfo.value) - - def test_init_invalid_metadata_uri_none(self, test_dbs): - """Test initialization fails with None metadata URI.""" - with pytest.raises(ValueError) as excinfo: - ChangelogGenerator( - metadata_uri=None, - release_label="2024-01" - ) - assert "metadata_uri must be a non-empty string" in str(excinfo.value) - - def test_init_invalid_metadata_uri_not_string(self, test_dbs): - """Test initialization fails with non-string metadata URI.""" - with pytest.raises(ValueError) as excinfo: - ChangelogGenerator( - metadata_uri=123, - release_label="2024-01" - ) - assert "metadata_uri must be a non-empty string" in str(excinfo.value) - - def test_init_invalid_release_label_empty(self, test_dbs): - """Test initialization fails with empty release label.""" - metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url - - with pytest.raises(ValueError) as excinfo: - ChangelogGenerator( - metadata_uri=metadata_uri, - release_label="" - ) - assert "release_label must be a non-empty string" in str(excinfo.value) - - def test_init_invalid_release_label_none(self, test_dbs): - """Test initialization fails with None release label.""" - metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url - - with pytest.raises(ValueError) as excinfo: - ChangelogGenerator( - metadata_uri=metadata_uri, - release_label=None - ) - assert "release_label must be a non-empty string" in str(excinfo.value) - - def test_init_invalid_release_label_not_string(self, test_dbs): - """Test initialization fails with non-string release label.""" - metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url - with pytest.raises(ValueError) as excinfo: - ChangelogGenerator( - metadata_uri=metadata_uri, - release_label=123 - ) - assert "release_label must be a non-empty string" in str(excinfo.value) - - def test_verify_release_exists(self, test_dbs): - """Test verify_release returns correct type for existing release.""" - metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url - with test_dbs['ensembl_genome_metadata'].dbc.session_scope() as session: - release = session.query(EnsemblRelease).first() - if release: - generator = ChangelogGenerator( - metadata_uri=metadata_uri, - release_label=release.label - ) - release_type = generator.verify_release() - assert release_type in ['partial', 'integrated'] - assert release_type == release.release_type - - def test_verify_release_not_found(self, test_dbs): - """Test verify_release raises error for non-existent release.""" - metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url - generator = ChangelogGenerator( - metadata_uri=metadata_uri, - release_label="nonexistent-release-99999" - ) - with pytest.raises(ValueError) as excinfo: - generator.verify_release() - assert "Release not found" in str(excinfo.value) - assert "nonexistent-release-99999" in str(excinfo.value) - - def test_gather_partial_data_structure(self, test_dbs): - """Test gather_partial_data returns correct structure.""" - metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url - - with test_dbs['ensembl_genome_metadata'].dbc.session_scope() as session: - partial_release = session.query(EnsemblRelease).filter( - EnsemblRelease.release_type == 'partial' - ).first() - - if partial_release: - generator = ChangelogGenerator( - metadata_uri=metadata_uri, - release_label=partial_release.label - ) - - data = generator.gather_partial_data() - - assert isinstance(data, list) - first_entry = data[0] - required_keys = [ - 'scientific_name', 'common_name', 'assembly_name', - 'assembly_accession', 'annotation_provider', - 'geneset_updated', 'variation_updated', 'regulation_updated' - ] - for key in required_keys: - assert key in first_entry, f"Missing key: {key}" - assert isinstance(first_entry['scientific_name'], str) - assert first_entry['common_name'] is None or isinstance(first_entry['common_name'], str) - assert isinstance(first_entry['assembly_name'], str) - assert isinstance(first_entry['assembly_accession'], str) - assert first_entry['annotation_provider'] is None or isinstance(first_entry['annotation_provider'], - str) - assert isinstance(first_entry['geneset_updated'], int) - assert isinstance(first_entry['variation_updated'], int) - assert isinstance(first_entry['regulation_updated'], int) - assert first_entry['geneset_updated'] in [0, 1] - assert first_entry['variation_updated'] in [0, 1] - assert first_entry['regulation_updated'] in [0, 1] - - def test_gather_integrated_data_structure(self, test_dbs): - """Test gather_integrated_data returns correct structure.""" - metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url - with test_dbs['ensembl_genome_metadata'].dbc.session_scope() as session: - integrated_release = session.query(EnsemblRelease).filter( - EnsemblRelease.release_type == 'integrated' - ).first() - if integrated_release: - generator = ChangelogGenerator( - metadata_uri=metadata_uri, - release_label=integrated_release.label - ) - data = generator.gather_integrated_data() - assert isinstance(data, list) - if len(data) > 0: - first_entry = data[0] - required_keys = [ - 'scientific_name', 'common_name', 'assembly_name', - 'assembly_accession', 'annotation_provider', - 'geneset_updated', 'variation_updated', 'regulation_updated', - 'status' - ] - for key in required_keys: - assert key in first_entry, f"Missing key: {key}" - assert first_entry['status'] in ['New', 'Removed', 'Updated', 'Unchanged'] - - def test_get_annotation_sources_bulk(self, test_dbs): - """Test _get_annotation_sources_bulk retrieves annotation sources.""" - metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url - with test_dbs['ensembl_genome_metadata'].dbc.session_scope() as session: - partial_release = session.query(EnsemblRelease).filter( - EnsemblRelease.release_type == 'partial' - ).first() - if partial_release: - generator = ChangelogGenerator( - metadata_uri=metadata_uri, - release_label=partial_release.label - ) - from ensembl.production.metadata.api.models import Genome, GenomeDataset - genome_ids = [gr.genome_id for gr in session.query(GenomeDataset.genome_id).filter( - GenomeDataset.release_id == partial_release.release_id - ).distinct().limit(5).all()] - if genome_ids: - annotation_sources = generator._get_annotation_sources_bulk( - session, genome_ids - ) - assert isinstance(annotation_sources, dict) - for genome_id in annotation_sources.keys(): - assert isinstance(genome_id, int) - for source in annotation_sources.values(): - assert source is None or isinstance(source, str) - - def test_get_annotation_sources_bulk_empty_list(self, test_dbs): - """Test _get_annotation_sources_bulk handles empty genome list.""" - metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url - with test_dbs['ensembl_genome_metadata'].dbc.session_scope() as session: - partial_release = session.query(EnsemblRelease).filter( - EnsemblRelease.release_type == 'partial' - ).first() - if partial_release: - generator = ChangelogGenerator( - metadata_uri=metadata_uri, - release_label=partial_release.label - ) - annotation_sources = generator._get_annotation_sources_bulk(session, []) - assert isinstance(annotation_sources, dict) - assert len(annotation_sources) == 0 - - def test_export_to_csv_partial_release(self, test_dbs, tmp_path): - """Test export_to_csv creates file with correct structure for partial release.""" - metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url - output_file = tmp_path / "test_changelog.csv" - generator = ChangelogGenerator( - metadata_uri=metadata_uri, - release_label="2024-01", - output_path=str(output_file) - ) - - sample_data = [ - { - 'scientific_name': 'homo sapiens', - 'common_name': 'human', - 'assembly_name': 'GRCh38', - 'assembly_accession': 'GCA_000001405.15', - 'annotation_provider': 'Ensembl', - 'geneset_updated': 1, - 'variation_updated': 0, - 'regulation_updated': 1 - } - ] - generator.export_to_csv(sample_data) - - assert output_file.exists() - with open(output_file, 'r') as f: - lines = f.readlines() - assert lines[0].startswith('# Changelog for release') - assert '2024-01' in lines[0] - reader = csv.DictReader(lines[1:]) - rows = list(reader) - assert len(rows) == 1 - assert rows[0]['scientific_name'] == 'homo sapiens' - assert rows[0]['geneset_updated'] == '1' - assert rows[0]['variation_updated'] == '0' - assert 'status' not in rows[0] # Partial releases don't have status - - def test_export_to_csv_integrated_release(self, test_dbs, tmp_path): - """Test export_to_csv creates file with correct structure for integrated release.""" - metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url - output_file = tmp_path / "test_changelog_integrated.csv" - generator = ChangelogGenerator( - metadata_uri=metadata_uri, - release_label="112", - output_path=str(output_file) - ) - sample_data = [ - { - 'scientific_name': 'homo sapiens', - 'common_name': 'human', - 'assembly_name': 'GRCh38', - 'assembly_accession': 'GCA_000001405.15', - 'annotation_provider': 'Ensembl', - 'geneset_updated': '2024-01', - 'variation_updated': None, - 'regulation_updated': '2024-01', - 'status': 'Updated' - } - ] - generator.export_to_csv(sample_data) - assert output_file.exists() - with open(output_file, 'r') as f: - lines = f.readlines() - assert lines[0].startswith('# Changelog for release') - reader = csv.DictReader(lines[1:]) - rows = list(reader) - assert len(rows) == 1 - assert rows[0]['scientific_name'] == 'homo sapiens' - assert rows[0]['status'] == 'Updated' # Integrated releases have status - - def test_export_to_csv_default_output_path(self, test_dbs, tmp_path, monkeypatch): - """Test export_to_csv uses default output path when none specified.""" - metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url - monkeypatch.chdir(tmp_path) - generator = ChangelogGenerator( - metadata_uri=metadata_uri, - release_label="2024-01" - ) - sample_data = [ - { - 'scientific_name': 'test species', - 'common_name': 'test', - 'assembly_name': 'test', - 'assembly_accession': 'test', - 'annotation_provider': 'test', - 'geneset_updated': 0, - 'variation_updated': 0, - 'regulation_updated': 0 - } - ] - generator.export_to_csv(sample_data) - default_file = tmp_path / "2024-01.csv" - assert default_file.exists() - - def test_export_to_csv_empty_data(self, test_dbs, tmp_path): - """Test export_to_csv handles empty data correctly.""" - metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url - output_file = tmp_path / "test_empty.csv" - generator = ChangelogGenerator( - metadata_uri=metadata_uri, - release_label="2024-01", - output_path=str(output_file) - ) - generator.export_to_csv([]) - assert output_file.exists() - with open(output_file, 'r') as f: - lines = f.readlines() - assert lines[0].startswith('# Changelog for release') - assert len(lines) >= 2 - - def test_export_to_csv_creates_parent_directory(self, test_dbs, tmp_path): - """Test export_to_csv creates parent directories if they don't exist.""" - metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url - output_file = tmp_path / "nested" / "directories" / "changelog.csv" - generator = ChangelogGenerator( - metadata_uri=metadata_uri, - release_label="2024-01", - output_path=str(output_file) - ) - sample_data = [ - { - 'scientific_name': 'test', - 'common_name': 'test', - 'assembly_name': 'test', - 'assembly_accession': 'test', - 'annotation_provider': 'test', - 'geneset_updated': 0, - 'variation_updated': 0, - 'regulation_updated': 0 - } - ] - generator.export_to_csv(sample_data) - assert output_file.exists() - assert output_file.parent.exists() - - def test_generate_partial_release(self, test_dbs, tmp_path): - """Test generate method works end-to-end for partial release.""" - metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url - with test_dbs['ensembl_genome_metadata'].dbc.session_scope() as session: - partial_release = session.query(EnsemblRelease).filter( - EnsemblRelease.release_type == 'partial' - ).first() - output_file = tmp_path / "test_generate.csv" - generator = ChangelogGenerator( - metadata_uri=metadata_uri, - release_label=partial_release.label, - output_path=str(output_file) - ) - generator.generate() - assert output_file.exists() - - def test_generate_integrated_release(self, test_dbs, tmp_path): - """Test generate method works end-to-end for integrated release.""" - metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url - - with test_dbs['ensembl_genome_metadata'].dbc.session_scope() as session: - integrated_release = session.query(EnsemblRelease).filter( - EnsemblRelease.release_type == 'integrated' - ).first() - if integrated_release: - output_file = tmp_path / "test_generate_integrated.csv" - generator = ChangelogGenerator( - metadata_uri=metadata_uri, - release_label=integrated_release.label, - output_path=str(output_file) - ) - generator.generate() - assert output_file.exists() - - def test_generate_invalid_release(self, test_dbs, tmp_path): - """Test generate method raises error for invalid release.""" - metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url - output_file = tmp_path / "test_invalid.csv" - generator = ChangelogGenerator( - metadata_uri=metadata_uri, - release_label="invalid-release-999", - output_path=str(output_file) - ) - with pytest.raises(ValueError) as excinfo: - generator.generate() - assert "Release not found" in str(excinfo.value) - - def test_gather_partial_data_no_genomes(self, test_dbs): - """Test gather_partial_data returns empty list when no genomes found.""" - metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url - with test_dbs['ensembl_genome_metadata'].dbc.session_scope() as session: - partial_release = session.query(EnsemblRelease).filter( - EnsemblRelease.release_type == 'partial' - ).first() - generator = ChangelogGenerator( - metadata_uri=metadata_uri, - release_label=partial_release.label - ) - - data = generator.gather_partial_data() - assert isinstance(data, list) - - def test_csv_fieldnames_partial_vs_integrated(self, test_dbs, tmp_path): - """Test that CSV has different fieldnames for partial vs integrated releases.""" - metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url - - # Partial release data - partial_file = tmp_path / "partial.csv" - generator_partial = ChangelogGenerator( - metadata_uri=metadata_uri, - release_label="2024-01", - output_path=str(partial_file) - ) - - partial_data = [{ - 'scientific_name': 'test', 'common_name': 'test', 'assembly_name': 'test', - 'assembly_accession': 'test', 'annotation_provider': 'test', - 'geneset_updated': 0, 'variation_updated': 0, 'regulation_updated': 0 - }] - - generator_partial.export_to_csv(partial_data) - - integrated_file = tmp_path / "integrated.csv" - generator_integrated = ChangelogGenerator( - metadata_uri=metadata_uri, - release_label="112", - output_path=str(integrated_file) - ) - - integrated_data = [{ - 'scientific_name': 'test', 'common_name': 'test', 'assembly_name': 'test', - 'assembly_accession': 'test', 'annotation_provider': 'test', - 'geneset_updated': '2024-01', 'variation_updated': None, 'regulation_updated': None, - 'status': 'New' - }] - - generator_integrated.export_to_csv(integrated_data) - - with open(partial_file, 'r') as f: - lines = f.readlines() - header = lines[1].strip() # Skip comment line - assert 'status' not in header - - with open(integrated_file, 'r') as f: - lines = f.readlines() - header = lines[1].strip() - assert 'status' in header diff --git a/src/tests/test_exports_changelog.py b/src/tests/test_exports_changelog.py new file mode 100644 index 00000000..dd9f1d1b --- /dev/null +++ b/src/tests/test_exports_changelog.py @@ -0,0 +1,489 @@ +# See the NOTICE file distributed with this work for additional information +# regarding copyright ownership. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import csv +from pathlib import Path + +import pytest + +from ensembl.production.metadata.api.exports.changelog_generator import ChangelogGenerator +from ensembl.production.metadata.api.models import EnsemblRelease + +db_directory = Path(__file__).parent / 'databases' +db_directory = db_directory.resolve() + + +@pytest.mark.parametrize( + "test_dbs", + [[{'src': db_directory / "ensembl_genome_metadata"}, {'src': db_directory / "ncbi_taxonomy"}]], + indirect=True, +) +class TestChangelogGenerator: + """Test suite for ChangelogGenerator class.""" + + def test_init_valid_parameters(self, test_dbs): + """Test ChangelogGenerator initialization with valid parameters.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + generator = ChangelogGenerator( + metadata_uri=metadata_uri, + release_label="2024-01" + ) + assert generator.metadata_db is not None + assert generator.release_label == "2024-01" + assert generator.output_path is None + + def test_init_with_output_path(self, test_dbs): + """Test ChangelogGenerator initialization with custom output path.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + output_path = "/tmp/test_changelog.csv" + + generator = ChangelogGenerator( + metadata_uri=metadata_uri, + release_label="2024-01", + output_path=output_path + ) + assert generator.metadata_db is not None + assert generator.release_label == "2024-01" + assert generator.output_path == output_path + + def test_init_invalid_metadata_uri_empty(self, test_dbs): + """Test initialization fails with empty metadata URI.""" + with pytest.raises(ValueError) as excinfo: + ChangelogGenerator( + metadata_uri="", + release_label="2024-01" + ) + assert "metadata_uri must be a non-empty string" in str(excinfo.value) + + def test_init_invalid_metadata_uri_none(self, test_dbs): + """Test initialization fails with None metadata URI.""" + with pytest.raises(ValueError) as excinfo: + ChangelogGenerator( + metadata_uri=None, + release_label="2024-01" + ) + assert "metadata_uri must be a non-empty string" in str(excinfo.value) + + def test_init_invalid_metadata_uri_not_string(self, test_dbs): + """Test initialization fails with non-string metadata URI.""" + with pytest.raises(ValueError) as excinfo: + ChangelogGenerator( + metadata_uri=123, + release_label="2024-01" + ) + assert "metadata_uri must be a non-empty string" in str(excinfo.value) + + def test_init_invalid_release_label_empty(self, test_dbs): + """Test initialization fails with empty release label.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + + with pytest.raises(ValueError) as excinfo: + ChangelogGenerator( + metadata_uri=metadata_uri, + release_label="" + ) + assert "release_label must be a non-empty string" in str(excinfo.value) + + def test_init_invalid_release_label_none(self, test_dbs): + """Test initialization fails with None release label.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + + with pytest.raises(ValueError) as excinfo: + ChangelogGenerator( + metadata_uri=metadata_uri, + release_label=None + ) + assert "release_label must be a non-empty string" in str(excinfo.value) + + def test_init_invalid_release_label_not_string(self, test_dbs): + """Test initialization fails with non-string release label.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + with pytest.raises(ValueError) as excinfo: + ChangelogGenerator( + metadata_uri=metadata_uri, + release_label=123 + ) + assert "release_label must be a non-empty string" in str(excinfo.value) + + def test_verify_release_exists(self, test_dbs): + """Test verify_release returns correct type for existing release.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + with test_dbs['ensembl_genome_metadata'].dbc.session_scope() as session: + release = session.query(EnsemblRelease).first() + if release: + generator = ChangelogGenerator( + metadata_uri=metadata_uri, + release_label=release.label + ) + release_type = generator.verify_release() + assert release_type in ['partial', 'integrated'] + assert release_type == release.release_type + + def test_verify_release_not_found(self, test_dbs): + """Test verify_release raises error for non-existent release.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + generator = ChangelogGenerator( + metadata_uri=metadata_uri, + release_label="nonexistent-release-99999" + ) + with pytest.raises(ValueError) as excinfo: + generator.verify_release() + assert "Release not found" in str(excinfo.value) + assert "nonexistent-release-99999" in str(excinfo.value) + + def test_gather_partial_data_structure(self, test_dbs): + """Test gather_partial_data returns correct structure.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + + with test_dbs['ensembl_genome_metadata'].dbc.session_scope() as session: + partial_release = session.query(EnsemblRelease).filter( + EnsemblRelease.release_type == 'partial' + ).first() + + generator = ChangelogGenerator( + metadata_uri=metadata_uri, + release_label=partial_release.label + ) + + data = generator.gather_partial_data() + + assert isinstance(data, list) + first_entry = data[0] + required_keys = [ + 'scientific_name', 'common_name', 'assembly_name', + 'assembly_accession', 'annotation_provider', + 'geneset_updated', 'variation_updated', 'regulation_updated' + ] + for key in required_keys: + assert key in first_entry, f"Missing key: {key}" + assert isinstance(first_entry['scientific_name'], str) + assert first_entry['common_name'] is None or isinstance(first_entry['common_name'], str) + assert isinstance(first_entry['assembly_name'], str) + assert isinstance(first_entry['assembly_accession'], str) + assert first_entry['annotation_provider'] is None or isinstance(first_entry['annotation_provider'], + str) + assert isinstance(first_entry['geneset_updated'], int) + assert isinstance(first_entry['variation_updated'], int) + assert isinstance(first_entry['regulation_updated'], int) + assert first_entry['geneset_updated'] in [0, 1] + assert first_entry['variation_updated'] in [0, 1] + assert first_entry['regulation_updated'] in [0, 1] + + def test_gather_integrated_data_structure(self, test_dbs): + """Test gather_integrated_data returns correct structure.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + with test_dbs['ensembl_genome_metadata'].dbc.session_scope() as session: + integrated_release = session.query(EnsemblRelease).filter( + EnsemblRelease.release_type == 'integrated' + ).first() + if integrated_release: + generator = ChangelogGenerator( + metadata_uri=metadata_uri, + release_label=integrated_release.label + ) + data = generator.gather_integrated_data() + assert isinstance(data, list) + if len(data) > 0: + first_entry = data[0] + required_keys = [ + 'scientific_name', 'common_name', 'assembly_name', + 'assembly_accession', 'annotation_provider', + 'geneset_updated', 'variation_updated', 'regulation_updated', + 'status' + ] + for key in required_keys: + assert key in first_entry, f"Missing key: {key}" + assert first_entry['status'] in ['New', 'Removed', 'Updated', 'Unchanged'] + + def test_get_annotation_sources_bulk(self, test_dbs): + """Test _get_annotation_sources_bulk retrieves annotation sources.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + with test_dbs['ensembl_genome_metadata'].dbc.session_scope() as session: + partial_release = session.query(EnsemblRelease).filter( + EnsemblRelease.release_type == 'partial' + ).first() + generator = ChangelogGenerator( + metadata_uri=metadata_uri, + release_label=partial_release.label + ) + from ensembl.production.metadata.api.models import GenomeDataset + genome_ids = [gr.genome_id for gr in session.query(GenomeDataset.genome_id).filter( + GenomeDataset.release_id == partial_release.release_id + ).distinct().limit(5).all()] + if genome_ids: + annotation_sources = generator._get_annotation_sources_bulk( + session, genome_ids + ) + assert isinstance(annotation_sources, dict) + for genome_id in annotation_sources.keys(): + assert isinstance(genome_id, int) + for source in annotation_sources.values(): + assert source is None or isinstance(source, str) + + def test_get_annotation_sources_bulk_empty_list(self, test_dbs): + """Test _get_annotation_sources_bulk handles empty genome list.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + with test_dbs['ensembl_genome_metadata'].dbc.session_scope() as session: + partial_release = session.query(EnsemblRelease).filter( + EnsemblRelease.release_type == 'partial' + ).first() + if partial_release: + generator = ChangelogGenerator( + metadata_uri=metadata_uri, + release_label=partial_release.label + ) + annotation_sources = generator._get_annotation_sources_bulk(session, []) + assert isinstance(annotation_sources, dict) + assert len(annotation_sources) == 0 + + def test_export_to_csv_partial_release(self, test_dbs, tmp_path): + """Test export_to_csv creates file with correct structure for partial release.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + output_file = tmp_path / "test_changelog.csv" + generator = ChangelogGenerator( + metadata_uri=metadata_uri, + release_label="2024-01", + output_path=str(output_file) + ) + + sample_data = [ + { + 'scientific_name': 'homo sapiens', + 'common_name': 'human', + 'assembly_name': 'GRCh38', + 'assembly_accession': 'GCA_000001405.15', + 'annotation_provider': 'Ensembl', + 'geneset_updated': 1, + 'variation_updated': 0, + 'regulation_updated': 1 + } + ] + generator.export_to_csv(sample_data) + + assert output_file.exists() + with open(output_file, 'r') as f: + lines = f.readlines() + assert lines[0].startswith('# Changelog for release') + assert '2024-01' in lines[0] + reader = csv.DictReader(lines[1:]) + rows = list(reader) + assert len(rows) == 1 + assert rows[0]['scientific_name'] == 'homo sapiens' + assert rows[0]['geneset_updated'] == '1' + assert rows[0]['variation_updated'] == '0' + assert 'status' not in rows[0] # Partial releases don't have status + + def test_export_to_csv_integrated_release(self, test_dbs, tmp_path): + """Test export_to_csv creates file with correct structure for integrated release.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + output_file = tmp_path / "test_changelog_integrated.csv" + generator = ChangelogGenerator( + metadata_uri=metadata_uri, + release_label="112", + output_path=str(output_file) + ) + sample_data = [ + { + 'scientific_name': 'homo sapiens', + 'common_name': 'human', + 'assembly_name': 'GRCh38', + 'assembly_accession': 'GCA_000001405.15', + 'annotation_provider': 'Ensembl', + 'geneset_updated': '2024-01', + 'variation_updated': None, + 'regulation_updated': '2024-01', + 'status': 'Updated' + } + ] + generator.export_to_csv(sample_data) + assert output_file.exists() + with open(output_file, 'r') as f: + lines = f.readlines() + assert lines[0].startswith('# Changelog for release') + reader = csv.DictReader(lines[1:]) + rows = list(reader) + assert len(rows) == 1 + assert rows[0]['scientific_name'] == 'homo sapiens' + assert rows[0]['status'] == 'Updated' # Integrated releases have status + + def test_export_to_csv_default_output_path(self, test_dbs, tmp_path, monkeypatch): + """Test export_to_csv uses default output path when none specified.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + monkeypatch.chdir(tmp_path) + generator = ChangelogGenerator( + metadata_uri=metadata_uri, + release_label="2024-01" + ) + sample_data = [ + { + 'scientific_name': 'test species', + 'common_name': 'test', + 'assembly_name': 'test', + 'assembly_accession': 'test', + 'annotation_provider': 'test', + 'geneset_updated': 0, + 'variation_updated': 0, + 'regulation_updated': 0 + } + ] + generator.export_to_csv(sample_data) + default_file = tmp_path / "2024-01.csv" + assert default_file.exists() + + def test_export_to_csv_empty_data(self, test_dbs, tmp_path): + """Test export_to_csv handles empty data correctly.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + output_file = tmp_path / "test_empty.csv" + generator = ChangelogGenerator( + metadata_uri=metadata_uri, + release_label="2024-01", + output_path=str(output_file) + ) + generator.export_to_csv([]) + assert output_file.exists() + with open(output_file, 'r') as f: + lines = f.readlines() + assert lines[0].startswith('# Changelog for release') + assert len(lines) >= 2 + + def test_export_to_csv_creates_parent_directory(self, test_dbs, tmp_path): + """Test export_to_csv creates parent directories if they don't exist.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + output_file = tmp_path / "nested" / "directories" / "changelog.csv" + generator = ChangelogGenerator( + metadata_uri=metadata_uri, + release_label="2024-01", + output_path=str(output_file) + ) + sample_data = [ + { + 'scientific_name': 'test', + 'common_name': 'test', + 'assembly_name': 'test', + 'assembly_accession': 'test', + 'annotation_provider': 'test', + 'geneset_updated': 0, + 'variation_updated': 0, + 'regulation_updated': 0 + } + ] + generator.export_to_csv(sample_data) + assert output_file.exists() + assert output_file.parent.exists() + + def test_generate_partial_release(self, test_dbs, tmp_path): + """Test generate method works end-to-end for partial release.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + with test_dbs['ensembl_genome_metadata'].dbc.session_scope() as session: + partial_release = session.query(EnsemblRelease).filter( + EnsemblRelease.release_type == 'partial' + ).first() + output_file = tmp_path / "test_generate.csv" + generator = ChangelogGenerator( + metadata_uri=metadata_uri, + release_label=partial_release.label, + output_path=str(output_file) + ) + generator.generate() + assert output_file.exists() + + def test_generate_integrated_release(self, test_dbs, tmp_path): + """Test generate method works end-to-end for integrated release.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + + with test_dbs['ensembl_genome_metadata'].dbc.session_scope() as session: + integrated_release = session.query(EnsemblRelease).filter( + EnsemblRelease.release_type == 'integrated' + ).first() + if integrated_release: + output_file = tmp_path / "test_generate_integrated.csv" + generator = ChangelogGenerator( + metadata_uri=metadata_uri, + release_label=integrated_release.label, + output_path=str(output_file) + ) + generator.generate() + assert output_file.exists() + + def test_generate_invalid_release(self, test_dbs, tmp_path): + """Test generate method raises error for invalid release.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + output_file = tmp_path / "test_invalid.csv" + generator = ChangelogGenerator( + metadata_uri=metadata_uri, + release_label="invalid-release-999", + output_path=str(output_file) + ) + with pytest.raises(ValueError) as excinfo: + generator.generate() + assert "Release not found" in str(excinfo.value) + + def test_gather_partial_data_no_genomes(self, test_dbs): + """Test gather_partial_data returns empty list when no genomes found.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + with test_dbs['ensembl_genome_metadata'].dbc.session_scope() as session: + partial_release = session.query(EnsemblRelease).filter( + EnsemblRelease.release_type == 'partial' + ).first() + generator = ChangelogGenerator( + metadata_uri=metadata_uri, + release_label=partial_release.label + ) + + data = generator.gather_partial_data() + assert isinstance(data, list) + + def test_csv_fieldnames_partial_vs_integrated(self, test_dbs, tmp_path): + """Test that CSV has different fieldnames for partial vs integrated releases.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + + # Partial release data + partial_file = tmp_path / "partial.csv" + generator_partial = ChangelogGenerator( + metadata_uri=metadata_uri, + release_label="2024-01", + output_path=str(partial_file) + ) + + partial_data = [{ + 'scientific_name': 'test', 'common_name': 'test', 'assembly_name': 'test', + 'assembly_accession': 'test', 'annotation_provider': 'test', + 'geneset_updated': 0, 'variation_updated': 0, 'regulation_updated': 0 + }] + + generator_partial.export_to_csv(partial_data) + + integrated_file = tmp_path / "integrated.csv" + generator_integrated = ChangelogGenerator( + metadata_uri=metadata_uri, + release_label="112", + output_path=str(integrated_file) + ) + + integrated_data = [{ + 'scientific_name': 'test', 'common_name': 'test', 'assembly_name': 'test', + 'assembly_accession': 'test', 'annotation_provider': 'test', + 'geneset_updated': '2024-01', 'variation_updated': None, 'regulation_updated': None, + 'status': 'New' + }] + + generator_integrated.export_to_csv(integrated_data) + + with open(partial_file, 'r') as f: + lines = f.readlines() + header = lines[1].strip() # Skip comment line + assert 'status' not in header + + with open(integrated_file, 'r') as f: + lines = f.readlines() + header = lines[1].strip() + assert 'status' in header diff --git a/src/tests/test_exports_json.py b/src/tests/test_exports_json.py new file mode 100644 index 00000000..5b65a859 --- /dev/null +++ b/src/tests/test_exports_json.py @@ -0,0 +1,321 @@ +# See the NOTICE file distributed with this work for additional information +# regarding copyright ownership. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +from datetime import datetime +from pathlib import Path + +import pytest + +from ensembl.production.metadata.api.exports.ftp_index import FTPMetadataExporter +from ensembl.production.metadata.api.models import Genome, ReleaseStatus + +db_directory = Path(__file__).parent / 'databases' +db_directory = db_directory.resolve() + + +@pytest.mark.parametrize( + "test_dbs", + [[{'src': db_directory / "ensembl_genome_metadata"}, {'src': db_directory / "ncbi_taxonomy"}]], + indirect=True, +) +class TestFTPMetadataExporter: + """Test suite for FTPMetadataExporter class.""" + + def test_init_valid_uri(self, test_dbs): + """Test FTPMetadataExporter initialization with valid metadata URI.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + exporter = FTPMetadataExporter(metadata_uri) + assert exporter.metadata_db is not None + + def test_export_to_json_returns_dict(self, test_dbs): + """Test export_to_json returns dictionary when no output file specified.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + exporter = FTPMetadataExporter(metadata_uri) + result = exporter.export_to_json() + assert isinstance(result, dict) + assert 'last_updated' in result + assert 'species' in result + assert isinstance(result['species'], dict) + + def test_export_to_json_creates_file(self, test_dbs, tmp_path): + """Test export_to_json creates file when output_file is specified.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + exporter = FTPMetadataExporter(metadata_uri) + output_file = tmp_path / "ftp_metadata.json" + result = exporter.export_to_json(str(output_file)) + assert result is None + assert output_file.exists() + with open(output_file, 'r') as f: + data = json.load(f) + assert 'last_updated' in data + assert 'species' in data + + def test_build_ftp_metadata_json_structure(self, test_dbs): + """Test build_ftp_metadata_json returns correct structure.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + exporter = FTPMetadataExporter(metadata_uri) + metadata = exporter.build_ftp_metadata_json() + assert isinstance(metadata, dict) + assert 'last_updated' in metadata + assert 'species' in metadata + assert isinstance(metadata['species'], dict) + first_species = next(iter(metadata['species'].values())) + assert 'assemblies' in first_species + assert isinstance(first_species['assemblies'], dict) + + def test_load_all_genome_data(self, test_dbs): + """Test _load_all_genome_data returns correct structure.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + exporter = FTPMetadataExporter(metadata_uri) + with exporter.metadata_db.session_scope() as session: + genome_data = exporter._load_all_genome_data(session) + assert isinstance(genome_data, dict) + + first_genome_uuid = next(iter(genome_data.keys())) + first_genome_data = genome_data[first_genome_uuid] + assert 'genome' in first_genome_data + assert 'datasets' in first_genome_data + assert 'attributes' in first_genome_data + assert 'genebuild_metadata' in first_genome_data + assert isinstance(first_genome_data['datasets'], list) + assert isinstance(first_genome_data['attributes'], dict) + + @pytest.mark.parametrize( + ("input_name", "expected_name"), + [ + ("homo sapiens", "homo_sapiens"), + ("species.name", "species_name"), + ("species__name", "species_name"), + ("species___name", "species_name"), + ("homo. sapiens", "homo_sapiens"), + ("homo sapiens", "homo_sapiens"), + ("", ""), + ("homo_sapiens", "homo_sapiens"), + ("Homo. Sapiens", "Homo_Sapiens"), + ("homo sapiens", "homo_sapiens"), + (" homo sapiens ", "_homo_sapiens_"), + ], + ) + def test_normalize_species_name(self, test_dbs, input_name, expected_name): + """Test _normalize_species_name correctly normalizes species names.""" + metadata_uri = test_dbs["ensembl_genome_metadata"].dbc.url + exporter = FTPMetadataExporter(metadata_uri) + assert exporter._normalize_species_name(input_name) == expected_name + + def test_extract_provider_from_path(self, test_dbs): + """Test _extract_provider_from_path extracts provider correctly.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + exporter = FTPMetadataExporter(metadata_uri) + genebuild_metadata = { + 'genebuild_source_name': 'Ensembl' + } + assert exporter._extract_provider_from_path(genebuild_metadata) == 'ensembl' + genebuild_metadata = { + 'genebuild_source_name': 'REFSEQ' + } + assert exporter._extract_provider_from_path(genebuild_metadata) == 'refseq' + assert exporter._extract_provider_from_path(None) == 'unknown' + assert exporter._extract_provider_from_path({}) == 'unknown' + + def test_extract_genebuild_release_info(self, test_dbs): + """Test _extract_genebuild_release_info extracts release correctly.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + exporter = FTPMetadataExporter(metadata_uri) + genebuild_metadata = { + 'last_geneset_update': '2024-01-01' + } + result = exporter._extract_genebuild_release_info(genebuild_metadata) + assert result['release'] == '2024_01' + genebuild_metadata = { + 'last_geneset_update': '2023-12-15' + } + result = exporter._extract_genebuild_release_info(genebuild_metadata) + assert result['release'] == '2023_12' + result = exporter._extract_genebuild_release_info(None) + assert result['release'] == 'unknown' + result = exporter._extract_genebuild_release_info({}) + assert result['release'] == 'unknown' + genebuild_metadata = { + 'last_geneset_update': 'invalid-date' + } + result = exporter._extract_genebuild_release_info(genebuild_metadata) + assert result['release'] == 'unknown' + + def test_extract_release_info_from_ensembl_release(self, test_dbs): + """Test _extract_release_info_from_ensembl_release extracts release correctly.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + exporter = FTPMetadataExporter(metadata_uri) + with exporter.metadata_db.session_scope() as session: + genome = session.query(Genome).first() + + if genome: + result = exporter._extract_release_info_from_ensembl_release(genome) + + assert isinstance(result, dict) + assert 'release' in result + has_released = any( + gr.ensembl_release and gr.ensembl_release.status == ReleaseStatus.RELEASED + for gr in genome.genome_releases + ) + if has_released: + assert result['release'] != 'unknown' + + def test_has_released_dataset_bulk(self, test_dbs): + """Test _has_released_dataset_bulk correctly identifies dataset types.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + exporter = FTPMetadataExporter(metadata_uri) + + datasets = [ + {'dataset_type_name': 'genebuild'}, + {'dataset_type_name': 'assembly'} + ] + assert exporter._has_released_dataset_bulk(datasets, 'genebuild') is True + assert exporter._has_released_dataset_bulk(datasets, 'assembly') is True + assert exporter._has_released_dataset_bulk(datasets, 'variation') is False + datasets = [ + {'dataset_type_name': 'regulatory_features'} + ] + assert exporter._has_released_dataset_bulk(datasets, 'regulation') is True + assert exporter._has_released_dataset_bulk([], 'genebuild') is False + + def test_get_dataset_file_paths_genebuild(self, test_dbs): + """Test _get_dataset_file_paths generates correct file paths for genebuild.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + exporter = FTPMetadataExporter(metadata_uri) + base_path = "homo_sapiens/GRCh38/ensembl/geneset/2024_01" + with exporter.metadata_db.session_scope() as session: + genome = session.query(Genome).first() + assembly_data = {'accession': 'GRCh38'} if genome else {} + file_paths = exporter._get_dataset_file_paths( + base_path, 'genebuild', genome, assembly_data + ) + + assert 'annotations' in file_paths + assert 'cdna.fa.gz' in file_paths['annotations'] + assert 'genes.gff3.gz' in file_paths['annotations'] + assert 'genes.gtf.gz' in file_paths['annotations'] + assert 'pep.fa.gz' in file_paths['annotations'] + assert 'vep' in file_paths + assert 'genes.gff3.bgz' in file_paths['vep'] + + def test_get_dataset_file_paths_assembly(self, test_dbs): + """Test _get_dataset_file_paths generates correct file paths for assembly.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + exporter = FTPMetadataExporter(metadata_uri) + base_path = "homo_sapiens/GRCh38/genome" + with exporter.metadata_db.session_scope() as session: + genome = session.query(Genome).first() + assembly_data = {'accession': 'GRCh38'} if genome else {} + file_paths = exporter._get_dataset_file_paths( + base_path, 'assembly', genome, assembly_data + ) + assert 'genome_sequences' in file_paths + assert 'chromosomes.tsv.gz' in file_paths['genome_sequences'] + assert 'hardmasked.fa.gz' in file_paths['genome_sequences'] + assert 'softmasked.fa.gz' in file_paths['genome_sequences'] + assert 'unmasked.fa.gz' in file_paths['genome_sequences'] + assert 'vep' in file_paths + assert 'softmasked.fa.bgz' in file_paths['vep'] + + def test_get_dataset_file_paths_variation(self, test_dbs): + """Test _get_dataset_file_paths generates correct file paths for variation.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + exporter = FTPMetadataExporter(metadata_uri) + base_path = "homo_sapiens/GRCh38/ensembl/variation/2024_01" + with exporter.metadata_db.session_scope() as session: + genome = session.query(Genome).first() + assembly_data = {'accession': 'GRCh38'} if genome else {} + file_paths = exporter._get_dataset_file_paths( + base_path, 'variation', genome, assembly_data + ) + assert 'variation_data' in file_paths + assert 'variation.vcf.gz' in file_paths['variation_data'] + + def test_get_dataset_file_paths_regulation(self, test_dbs): + """Test _get_dataset_file_paths generates correct file paths for regulation.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + exporter = FTPMetadataExporter(metadata_uri) + base_path = "homo_sapiens/GRCh38/ensembl/regulation" + with exporter.metadata_db.session_scope() as session: + genome = session.query(Genome).first() + assembly_data = {'accession': 'GRCh38'} if genome else {} + file_paths = exporter._get_dataset_file_paths( + base_path, 'regulation', genome, assembly_data + ) + + assert 'regulatory_features' in file_paths + assert 'regulation.gff' in file_paths['regulatory_features'] + + def test_get_dataset_file_paths_homologies(self, test_dbs): + """Test _get_dataset_file_paths generates correct file paths for homologies.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + exporter = FTPMetadataExporter(metadata_uri) + base_path = "homo_sapiens/GRCh38/ensembl/homology/2024_01" + with exporter.metadata_db.session_scope() as session: + genome = session.query(Genome).first() + if genome: + assembly_data = {'accession': genome.assembly.accession} + file_paths = exporter._get_dataset_file_paths( + base_path, 'homologies', genome, assembly_data + ) + assert 'homology_data' in file_paths + homology_files = file_paths['homology_data'] + assert len(homology_files) > 0 + first_file = next(iter(homology_files.keys())) + assert 'homology.tsv.gz' in first_file + + def test_export_json_with_actual_data(self, test_dbs): + """Test export generates valid JSON structure with actual database data.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + exporter = FTPMetadataExporter(metadata_uri) + metadata = exporter.export_to_json() + assert metadata is not None + assert 'last_updated' in metadata + assert 'species' in metadata + + for species_name, species_data in metadata['species'].items(): + assert isinstance(species_name, str) + assert 'assemblies' in species_data + for assembly_name, assembly_data in species_data['assemblies'].items(): + assert isinstance(assembly_name, str) + if 'providers' in assembly_data: + for provider_name, provider_data in assembly_data['providers'].items(): + assert isinstance(provider_name, str) + if 'releases' in provider_data: + for release_name, release_data in provider_data['releases'].items(): + assert isinstance(release_name, str) + if 'datasets' in release_data: + assert isinstance(release_data['datasets'], dict) + + def test_export_handles_empty_database(self, test_dbs): + """Test export handles database with no released genomes gracefully.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + exporter = FTPMetadataExporter(metadata_uri) + metadata = exporter.export_to_json() + assert metadata is not None + assert 'last_updated' in metadata + assert 'species' in metadata + assert isinstance(metadata['species'], dict) + + def test_json_file_is_valid_json(self, test_dbs, tmp_path): + """Test that exported JSON file can be read back and is valid.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + exporter = FTPMetadataExporter(metadata_uri) + output_file = tmp_path / "test_output.json" + exporter.export_to_json(str(output_file)) + + with open(output_file, 'r') as f: + data = json.load(f) + assert 'last_updated' in data + assert 'species' in data + datetime.fromisoformat(data['last_updated']) diff --git a/src/tests/test_exports_stats.py b/src/tests/test_exports_stats.py new file mode 100644 index 00000000..a28200c3 --- /dev/null +++ b/src/tests/test_exports_stats.py @@ -0,0 +1,342 @@ +# See the NOTICE file distributed with this work for additional information +# regarding copyright ownership. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import csv +from pathlib import Path + +import pytest + +from ensembl.production.metadata.api.exports.stats_generator import StatsGenerator + +db_directory = Path(__file__).parent / 'databases' +db_directory = db_directory.resolve() + + +@pytest.mark.parametrize( + "test_dbs", + [[{'src': db_directory / "ensembl_genome_metadata"}, {'src': db_directory / "ncbi_taxonomy"}]], + indirect=True, +) +class TestStatsGenerator: + """Test suite for StatsGenerator class.""" + + def test_init_valid_uri(self, test_dbs): + """Test StatsGenerator initialization with valid metadata URI.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + generator = StatsGenerator(metadata_uri) + assert generator.metadata_db is not None + assert generator.output_path == Path.cwd() + + def test_init_with_output_path(self, test_dbs, tmp_path): + """Test StatsGenerator initialization with custom output path.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + output_path = tmp_path / "test_output" + generator = StatsGenerator(metadata_uri, output_path=str(output_path)) + assert generator.metadata_db is not None + assert generator.output_path == output_path + assert output_path.exists() + + def test_init_invalid_uri_empty(self, test_dbs): + """Test StatsGenerator initialization fails with empty URI.""" + with pytest.raises(ValueError) as excinfo: + StatsGenerator("") + assert "metadata_uri must be a non-empty string" in str(excinfo.value) + + def test_init_invalid_uri_none(self, test_dbs): + """Test StatsGenerator initialization fails with None URI.""" + with pytest.raises(ValueError) as excinfo: + StatsGenerator(None) + assert "metadata_uri must be a non-empty string" in str(excinfo.value) + + def test_init_invalid_uri_not_string(self, test_dbs): + """Test StatsGenerator initialization fails with non-string URI.""" + with pytest.raises(ValueError) as excinfo: + StatsGenerator(123) + assert "metadata_uri must be a non-empty string" in str(excinfo.value) + + def test_get_partial_data(self, test_dbs): + """Test get_partial_data returns correct structure and values.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + generator = StatsGenerator(metadata_uri) + partial_data = generator.get_partial_data() + assert isinstance(partial_data, list) + if len(partial_data) > 0: + first_release = partial_data[0] + required_keys = [ + 'release', 'new_genomes', 'total_genomes', + 'new_assemblies', 'total_assemblies', + 'new_variation_datasets', 'total_variation_datasets', + 'new_regulation_datasets', 'total_regulation_datasets' + ] + for key in required_keys: + assert key in first_release, f"Missing key: {key}" + assert isinstance(first_release['release'], str) + assert isinstance(first_release['new_genomes'], int) + assert isinstance(first_release['total_genomes'], int) + assert isinstance(first_release['new_assemblies'], int) + assert isinstance(first_release['total_assemblies'], int) + assert isinstance(first_release['new_variation_datasets'], int) + assert isinstance(first_release['total_variation_datasets'], int) + assert isinstance(first_release['new_regulation_datasets'], int) + assert isinstance(first_release['total_regulation_datasets'], int) + # Verify cumulative totals are non-decreasing + for i in range(1, len(partial_data)): + assert partial_data[i]['total_genomes'] >= partial_data[i - 1]['total_genomes'] + assert partial_data[i]['total_assemblies'] >= partial_data[i - 1]['total_assemblies'] + assert partial_data[i]['total_variation_datasets'] >= partial_data[i - 1]['total_variation_datasets'] + assert partial_data[i]['total_regulation_datasets'] >= partial_data[i - 1]['total_regulation_datasets'] + + assert len(partial_data) == 2 + assert partial_data[0]['release'] == '2020-10-18' + assert partial_data[0]['new_genomes'] == 3 + + def test_get_partial_data_specific_values(self, test_dbs): + """Test get_partial_data returns specific expected values from test database.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + generator = StatsGenerator(metadata_uri) + partial_data = generator.get_partial_data() + + assert len(partial_data) == 2 + if len(partial_data) >= 1: + assert partial_data[0]['release'] == '2020-10-18' + assert partial_data[0]['new_genomes'] == 3 + assert partial_data[0]['total_genomes'] == 3 + + def test_get_integrated_data(self, test_dbs): + """Test get_integrated_data returns correct structure and values.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + generator = StatsGenerator(metadata_uri) + integrated_data = generator.get_integrated_data() + + assert isinstance(integrated_data, list) + + # Don't actually have any integrated data in the test db. + # TODO: Add some integrated data. + if len(integrated_data) > 0: + first_release = integrated_data[0] + required_keys = [ + 'release', 'genomes', 'assemblies', + 'variation_datasets', 'regulation_datasets' + ] + for key in required_keys: + assert key in first_release, f"Missing key: {key}" + assert isinstance(first_release['release'], str) + assert isinstance(first_release['genomes'], int) + assert isinstance(first_release['assemblies'], int) + assert isinstance(first_release['variation_datasets'], int) + assert isinstance(first_release['regulation_datasets'], int) + + assert len(integrated_data) == 0 + assert integrated_data[0]['release'] == '2025-01' + assert integrated_data[0]['genomes'] == 12 + + # def test_get_integrated_data_specific_values(self, test_dbs): + # """Test get_integrated_data returns specific expected values from test database.""" + # metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + # generator = StatsGenerator(metadata_uri) + # + # integrated_data = generator.get_integrated_data() + # + # assert len(integrated_data) == 2 + # if len(integrated_data) >= 1: + # assert integrated_data[0]['release'] == '112' + # assert integrated_data[0]['genomes'] == 50 + # assert integrated_data[0]['assemblies'] == 45 + + def test_count_datasets(self, test_dbs): + """Test _count_datasets returns correct count for a specific release and dataset type.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + generator = StatsGenerator(metadata_uri) + with generator.metadata_db.session_scope() as session: + release_id = 1 + variation_count = generator._count_datasets(session, release_id, 'variation') + assert variation_count == 3 + + regulation_count = generator._count_datasets(session, release_id, 'regulatory_features') + assert regulation_count == 0 + + def test_count_and_get_dataset_ids(self, test_dbs): + """Test _count_and_get_dataset_ids returns correct count and IDs.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + generator = StatsGenerator(metadata_uri) + + with generator.metadata_db.session_scope() as session: + release_id = 1 + count, dataset_ids = generator._count_and_get_dataset_ids( + session, release_id, 'variation' + ) + + assert isinstance(count, int) + assert isinstance(dataset_ids, set) + assert count == len(dataset_ids) + assert count == 3 + + def test_export_to_csv(self, test_dbs, tmp_path): + """Test export_to_csv creates files with correct structure.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + output_path = tmp_path / "csv_output" + generator = StatsGenerator(metadata_uri, output_path=str(output_path)) + + # Create sample data + partial_data = [ + { + 'release': 'R1', + 'new_genomes': 10, + 'total_genomes': 10, + 'new_assemblies': 8, + 'total_assemblies': 8, + 'new_variation_datasets': 5, + 'total_variation_datasets': 5, + 'new_regulation_datasets': 3, + 'total_regulation_datasets': 3, + } + ] + + integrated_data = [ + { + 'release': 'R1', + 'genomes': 10, + 'assemblies': 8, + 'variation_datasets': 5, + 'regulation_datasets': 3, + } + ] + + generator.export_to_csv(partial_data, integrated_data) + + partial_file = output_path / 'stats.partial.csv' + integrated_file = output_path / 'stats.integrated.csv' + assert partial_file.exists() + assert integrated_file.exists() + with open(partial_file, 'r') as f: + reader = csv.DictReader(f) + rows = list(reader) + assert len(rows) == 1 + assert rows[0]['release'] == 'R1' + assert rows[0]['new_genomes'] == '10' + assert rows[0]['total_genomes'] == '10' + with open(integrated_file, 'r') as f: + reader = csv.DictReader(f) + rows = list(reader) + assert len(rows) == 1 + assert rows[0]['release'] == 'R1' + assert rows[0]['genomes'] == '10' + assert rows[0]['assemblies'] == '8' + + def test_export_to_csv_sorting(self, test_dbs, tmp_path): + """Test export_to_csv sorts data by release label.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + output_path = tmp_path / "csv_output_sorted" + generator = StatsGenerator(metadata_uri, output_path=str(output_path)) + partial_data = [ + {'release': 'R3', 'new_genomes': 30, 'total_genomes': 60, + 'new_assemblies': 20, 'total_assemblies': 50, + 'new_variation_datasets': 10, 'total_variation_datasets': 30, + 'new_regulation_datasets': 5, 'total_regulation_datasets': 15}, + {'release': 'R1', 'new_genomes': 10, 'total_genomes': 10, + 'new_assemblies': 8, 'total_assemblies': 8, + 'new_variation_datasets': 5, 'total_variation_datasets': 5, + 'new_regulation_datasets': 3, 'total_regulation_datasets': 3}, + {'release': 'R2', 'new_genomes': 20, 'total_genomes': 30, + 'new_assemblies': 12, 'total_assemblies': 20, + 'new_variation_datasets': 5, 'total_variation_datasets': 10, + 'new_regulation_datasets': 2, 'total_regulation_datasets': 5}, + ] + + generator.export_to_csv(partial_data, []) + partial_file = output_path / 'stats.partial.csv' + assert partial_file.exists() + + with open(partial_file, 'r') as f: + reader = csv.DictReader(f) + rows = list(reader) + assert len(rows) == 3 + assert rows[0]['release'] == 'R1' + assert rows[1]['release'] == 'R2' + assert rows[2]['release'] == 'R3' + + def test_export_to_csv_empty_data(self, test_dbs, tmp_path): + """Test export_to_csv handles empty data correctly.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + output_path = tmp_path / "csv_output_empty" + generator = StatsGenerator(metadata_uri, output_path=str(output_path)) + generator.export_to_csv([], []) + + partial_file = output_path / 'stats.partial.csv' + integrated_file = output_path / 'stats.integrated.csv' + + assert partial_file.exists() + assert integrated_file.exists() + with open(partial_file, 'r') as f: + reader = csv.DictReader(f) + rows = list(reader) + assert len(rows) == 0 + + with open(integrated_file, 'r') as f: + reader = csv.DictReader(f) + rows = list(reader) + assert len(rows) == 0 + + def test_generate_integration(self, test_dbs, tmp_path): + """Test generate method integrates all components correctly.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + output_path = tmp_path / "generate_output" + generator = StatsGenerator(metadata_uri, output_path=str(output_path)) + generator.generate() + partial_file = output_path / 'stats.partial.csv' + integrated_file = output_path / 'stats.integrated.csv' + + assert partial_file.exists() + assert integrated_file.exists() + with open(partial_file, 'r') as f: + reader = csv.DictReader(f) + assert reader.fieldnames is not None + partial_fieldnames = [ + 'release', 'new_genomes', 'total_genomes', + 'new_assemblies', 'total_assemblies', + 'new_variation_datasets', 'total_variation_datasets', + 'new_regulation_datasets', 'total_regulation_datasets' + ] + assert reader.fieldnames == partial_fieldnames + + with open(integrated_file, 'r') as f: + reader = csv.DictReader(f) + assert reader.fieldnames is not None + integrated_fieldnames = [ + 'release', 'genomes', 'assemblies', + 'variation_datasets', 'regulation_datasets' + ] + assert reader.fieldnames == integrated_fieldnames + + def test_partial_data_ordering(self, test_dbs): + """Test that partial data is returned in correct order by release label.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + generator = StatsGenerator(metadata_uri) + partial_data = generator.get_partial_data() + if len(partial_data) > 1: + release_labels = [item['release'] for item in partial_data] + assert release_labels == sorted(release_labels) + # Assert that partial data exists and is not empty + assert partial_data is not None, "Partial data should not be None" + assert len(partial_data) >= 1, "Partial data should contain at least one item" + + def test_integrated_data_ordering(self, test_dbs): + """Test that integrated data is returned in correct order by release label.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + generator = StatsGenerator(metadata_uri) + integrated_data = generator.get_integrated_data() + if len(integrated_data) > 1: + release_labels = [item['release'] for item in integrated_data] + assert release_labels == sorted(release_labels) + if len(integrated_data) == 0: + release_labels = [item['release'] for item in integrated_data] + assert release_labels == [] diff --git a/src/tests/test_release_factory.py b/src/tests/test_release_factory.py index 182dd6e5..5ef93665 100644 --- a/src/tests/test_release_factory.py +++ b/src/tests/test_release_factory.py @@ -209,7 +209,7 @@ class TestFactoryUtils: def test_get_genome_sets_by_assembly_and_provider(self, test_dbs) -> None: """ - Test `get_genome_sets_by_assembly_and_provider'. + Test 'get_genome_sets_by_assembly_and_provider'. Pretty bad test. We haven't populated the metadata here with an updated genome so it just returns an empty set. """ metadata_db = DBConnection(test_dbs['ensembl_genome_metadata'].dbc.url) diff --git a/src/tests/test_scripts.py b/src/tests/test_scripts.py index 64b63ac4..206da357 100644 --- a/src/tests/test_scripts.py +++ b/src/tests/test_scripts.py @@ -1,20 +1,22 @@ # See the NOTICE file distributed with this work for additional information -# regarding copyright ownership. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +# regarding copyright ownership. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from collections import namedtuple from unittest.mock import patch +from urllib.parse import urlparse import pytest from ensembl.production.metadata.api.models import Assembly +from ensembl.production.metadata.api.models import OrganismGroup from ensembl.production.metadata.scripts.copy_handover_files import * from ensembl.production.metadata.scripts.create_datasets_json import * from ensembl.production.metadata.scripts.delete_ftp_by_uuid import * @@ -29,11 +31,16 @@ 'organism_group_name', 'genome_uuid', 'release_id', 'remove', 'raise_error' ]) -@pytest.mark.parametrize("test_dbs", [[{'src': Path(__file__).parent / "databases/ensembl_genome_metadata"}, - {'src': Path(__file__).parent / "databases/ncbi_taxonomy"}, - {'src': Path(__file__).parent / "databases/core_1"}, - ]], - indirect=True) + +@pytest.mark.parametrize( + "test_dbs", + [[ + {"src": db_directory / "ensembl_genome_metadata"}, + {'src': db_directory / "ncbi_taxonomy"}, + {'src': db_directory / "core_1"}, + ]], + indirect=True, +) class TestScripts: """Test suite for various metadata scripts.""" @@ -152,7 +159,7 @@ def test_regulation_copy_creates_directory(self, test_dbs, tmp_path): "dataset_attribute": [], "name": "test_regulation", "label": "test_label", - "version": "1.0" + "version": "1.0", } ] @@ -187,7 +194,6 @@ def test_create_organism_group_member(self, test_dbs): with DBConnection(metadata_uri).session_scope() as session: organism = session.query(Organism).first() - from ensembl.production.metadata.api.models import OrganismGroup org_group = session.query(OrganismGroup).first() if organism and org_group: existing = session.query(OrganismGroupMember).filter( @@ -246,7 +252,6 @@ def test_duckdb_script_environment_variable(self, test_dbs, monkeypatch): """Test that DuckDB script reads from environment variable.""" test_uri = "mysql://testuser:testpass@testhost:3306/testdb" monkeypatch.setenv('METADATA_DB', test_uri) - from urllib.parse import urlparse db = urlparse(os.environ.get('METADATA_DB')) assert db.hostname == "testhost" assert db.port == 3306 From 29fcba7f63b7e2c52701c6178e79a88b1bbae093 Mon Sep 17 00:00:00 2001 From: danielp Date: Tue, 11 Nov 2025 10:46:41 +0000 Subject: [PATCH 13/14] Minor updates that tests passed for. Then integrated data was added to tests. Lots of failures --- .../metadata/api/factories/datasets.py | 5 +++-- .../production/metadata/api/search/search.py | 4 +++- .../production/metadata/updater/core.py | 17 +++++++++-------- .../databases/ensembl_genome_metadata.db | Bin 262144 -> 262144 bytes 4 files changed, 15 insertions(+), 11 deletions(-) diff --git a/src/ensembl/production/metadata/api/factories/datasets.py b/src/ensembl/production/metadata/api/factories/datasets.py index faa0d058..44802e6f 100644 --- a/src/ensembl/production/metadata/api/factories/datasets.py +++ b/src/ensembl/production/metadata/api/factories/datasets.py @@ -396,9 +396,10 @@ def attach_misc_datasets(self, release_id, session=None, force=False): continue # ✅ Skip updating or inserting for this dataset # Check if GenomeDataset exists for this dataset & genome - genome_dataset = session.query(GenomeDataset).filter( + genome_dataset = session.query(GenomeDataset).join(EnsemblRelease).filter( GenomeDataset.dataset_id == dataset_obj.dataset_id, - GenomeDataset.genome_id == genome_id + GenomeDataset.genome_id == genome_id, + EnsemblRelease.release_type != "integrated" ).one_or_none() if genome_dataset: diff --git a/src/ensembl/production/metadata/api/search/search.py b/src/ensembl/production/metadata/api/search/search.py index f40eb1b3..2d4dcda5 100644 --- a/src/ensembl/production/metadata/api/search/search.py +++ b/src/ensembl/production/metadata/api/search/search.py @@ -11,6 +11,7 @@ from typing import Optional, List +from ensembl.utils.database import DBConnection from pydantic import BaseModel from sqlalchemy.orm import Session, joinedload @@ -216,7 +217,8 @@ def extract_genome_data( class GenomeSearchIndexer: """Service for generating genome search documents""" - def __init__(self, session: Session): + def __init__(self, metadata_uri: str): + self.metadata_db = DBConnection(metadata_uri, pool_size=cfg.pool_size, pool_recycle=cfg.pool_recycle) self.session = session self.query_helper = GenomeSearchQueryHelper(session) diff --git a/src/ensembl/production/metadata/updater/core.py b/src/ensembl/production/metadata/updater/core.py index c1f53b73..4195d0f0 100644 --- a/src/ensembl/production/metadata/updater/core.py +++ b/src/ensembl/production/metadata/updater/core.py @@ -469,7 +469,7 @@ def get_or_new_organism(self, species_id, meta_session): scientific_name=self.get_meta_single_meta_key(species_id, "organism.scientific_name"), biosample_id=biosample_id, strain=self.get_meta_single_meta_key(species_id, "organism.strain"), - strain_type=self.get_meta_single_meta_key(species_id, "organism.type"), + strain_type=self.get_meta_single_meta_key(species_id, "organism.strain_type"), scientific_parlance_name=self.get_meta_single_meta_key(species_id, "organism.scientific_parlance_name"), tol_id=tol_id, ) @@ -607,13 +607,7 @@ def get_assembly_sequences(self, species_id, assembly): return assembly_sequences, sequence_aliases - ENA_ACCESSION_PATTERNS = [ - re.compile(r'^[A-Z]{1}[0-9]{5}\.[0-9]+$'), - re.compile(r'^[A-Z]{2}[0-9]{6}\.[0-9]+$'), - re.compile(r'^[A-Z]{2}[0-9]{8}$'), - re.compile(r'^[A-Z]{4}[0-9]{2}S?[0-9]{6,8}$'), - re.compile(r'^[A-Z]{6}[0-9]{2}S?[0-9]{7,9}$'), - ] + def _is_valid_ena_accession(self, identifier): """ Check if an identifier matches ENA sequence identifier rules for annotated sequences. @@ -628,6 +622,13 @@ def _is_valid_ena_accession(self, identifier): Returns: bool: True if identifier matches any pattern """ + ENA_ACCESSION_PATTERNS = [ + re.compile(r'^[A-Z]{1}[0-9]{5}\.[0-9]+$'), + re.compile(r'^[A-Z]{2}[0-9]{6}\.[0-9]+$'), + re.compile(r'^[A-Z]{2}[0-9]{8}$'), + re.compile(r'^[A-Z]{4}[0-9]{2}S?[0-9]{6,8}$'), + re.compile(r'^[A-Z]{6}[0-9]{2}S?[0-9]{7,9}$'), + ] return any(pattern.match(identifier) for pattern in ENA_ACCESSION_PATTERNS) def _get_valid_accession(self, seq_region_name, synonyms): diff --git a/src/tests/databases/ensembl_genome_metadata.db b/src/tests/databases/ensembl_genome_metadata.db index 1f8767875b9584c2820d2b625f63438be02a0499..967bc19e6a53d9d979203daf5f48e1f2982f30c5 100644 GIT binary patch delta 941 zcmXZaO-NKx6bJBkKi-_VGs7JXp+If4FC{Hakcx7$)T}AXOvh5ore@M8tQ@K6gBc16 z7m_xmk_17ESrp+%AVCt9JTjpcL2YVPwrG)oQ9+x|eeW)Y_y2M3J@4H6CYj7gCNui8 zNy$J?m04n}J=Z%Hz5jd>$YI0(+TZVQ_7}+Ca#{{gWaeF!#jC+%5Hnlki(Dg4hW`Dy zY?PIjm9HsXf1;}=*3lj5iA5V?r(%(KEIKj$wlcyBWoigYNkx|YCO^yfa#p^SPvxjg z%X>0~O*hFlccEzrN{m@>7dUAKU==1p!T>An5CT|XhXBBGJNUr041*xx&c`S2e+-ak zmrQ_w9XQ;W4-)**$@w758so%ExG8w0T=y&>wzN0Vb6Pl(Yw2vlfJhd*P zDdDj?U1}HxGbud4X8cGG&)V(b5!qX!E5qZ z5nZYeVd;o2l^ZiyYPU-XB8q*e!_GqYLI80{<10tBIdHi{h*k~a!+tFal$d|9pVvC5 zHHX7`tOL$M^NWdS)_A4*g*EzAzovxNb=9y@0UKVi2i&hq-D;!;jkD^&`!vp|O1;Ku zRoUx+T~x2#qj7;_?>Ys%=0Us{ueDq2PQvCc&0F?KLRtg1c51cZxxCg6tycS#wMv8- zkJP)jYdln_(YUQnS*?)@;b*Ku5JG|h%-dXbHYyahI)x=Z%#}`FtnwBo3lnqD$-u#F VRj4co1+X?_RR8eFXTMmr_doemusr|( delta 105 zcmZo@5NK!+m>|ulGf~EwRfj<@+-qaXg8c$)d`B4gPw-FR59e3s`^I- Date: Mon, 24 Nov 2025 14:58:42 +0000 Subject: [PATCH 14/14] Fixed integrated tests for everything but grpc --- .../metadata/api/factories/datasets.py | 12 +++---- src/ensembl/production/metadata/grpc/utils.py | 2 +- .../databases/ensembl_genome_metadata.db | Bin 262144 -> 262144 bytes src/tests/test_exports_stats.py | 34 +++++++++--------- src/tests/test_utils.py | 4 +-- 5 files changed, 24 insertions(+), 28 deletions(-) diff --git a/src/ensembl/production/metadata/api/factories/datasets.py b/src/ensembl/production/metadata/api/factories/datasets.py index 44802e6f..66624b01 100644 --- a/src/ensembl/production/metadata/api/factories/datasets.py +++ b/src/ensembl/production/metadata/api/factories/datasets.py @@ -391,22 +391,20 @@ def attach_misc_datasets(self, release_id, session=None, force=False): for child_uuid in child_uuids: dataset_obj = session.query(Dataset).filter(Dataset.dataset_uuid == child_uuid).one() - # Skip if dataset is FAULTY or RELEASED if dataset_obj.status in (DatasetStatus.FAULTY, DatasetStatus.RELEASED): - continue # ✅ Skip updating or inserting for this dataset + continue - # Check if GenomeDataset exists for this dataset & genome - genome_dataset = session.query(GenomeDataset).join(EnsemblRelease).filter( + genome_dataset = session.query(GenomeDataset).outerjoin( + EnsemblRelease, GenomeDataset.release_id == EnsemblRelease.release_id + ).filter( GenomeDataset.dataset_id == dataset_obj.dataset_id, GenomeDataset.genome_id == genome_id, - EnsemblRelease.release_type != "integrated" + (EnsemblRelease.release_type != "integrated") | (GenomeDataset.release_id.is_(None)) ).one_or_none() if genome_dataset: - # ✅ Update release_id even if it was attached to a previous release genome_dataset.release_id = release_id else: - # ✅ If it doesn’t exist, create a new one new_gd = GenomeDataset( genome_id=genome_id, dataset=dataset_obj, diff --git a/src/ensembl/production/metadata/grpc/utils.py b/src/ensembl/production/metadata/grpc/utils.py index f4893de1..0e4acef7 100644 --- a/src/ensembl/production/metadata/grpc/utils.py +++ b/src/ensembl/production/metadata/grpc/utils.py @@ -160,7 +160,7 @@ def create_genome_with_attributes_and_count(db_conn, genome, release_version): ) -def get_genomes_from_assembly_accession_iterator(db_conn, assembly_accession, release_version): +def get_genomes_from_assembly_accession_iterator(db_conn, assembly_accession): if not assembly_accession: logger.warning("Missing or Empty Assembly accession field.") return msg_factory.create_genome() diff --git a/src/tests/databases/ensembl_genome_metadata.db b/src/tests/databases/ensembl_genome_metadata.db index 967bc19e6a53d9d979203daf5f48e1f2982f30c5..cbbab0c0070a24a8c0bba67ac487ad5daffb1ca7 100644 GIT binary patch delta 92 zcmZo@5NK!+m>|t)I8nx#)sR6i+-qaXqWuC>82C@{Pv8&dSLgf2cZ6@sWFf!FOFxNFSYF>S=ef2p;AZFUW`W$npApnZg8an_0 delta 82 zcmZo@5NK!+m>|ulKT*b+Ri8nxtaD?^qWuCh82C@{Pv8&dSLgf2cZ6@oWS%zZ9>A&CAFf^||*S`83BM>ufUww`_)DQp^!5dTn diff --git a/src/tests/test_exports_stats.py b/src/tests/test_exports_stats.py index a28200c3..d249555f 100644 --- a/src/tests/test_exports_stats.py +++ b/src/tests/test_exports_stats.py @@ -119,8 +119,6 @@ def test_get_integrated_data(self, test_dbs): assert isinstance(integrated_data, list) - # Don't actually have any integrated data in the test db. - # TODO: Add some integrated data. if len(integrated_data) > 0: first_release = integrated_data[0] required_keys = [ @@ -135,22 +133,22 @@ def test_get_integrated_data(self, test_dbs): assert isinstance(first_release['variation_datasets'], int) assert isinstance(first_release['regulation_datasets'], int) - assert len(integrated_data) == 0 - assert integrated_data[0]['release'] == '2025-01' - assert integrated_data[0]['genomes'] == 12 - - # def test_get_integrated_data_specific_values(self, test_dbs): - # """Test get_integrated_data returns specific expected values from test database.""" - # metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url - # generator = StatsGenerator(metadata_uri) - # - # integrated_data = generator.get_integrated_data() - # - # assert len(integrated_data) == 2 - # if len(integrated_data) >= 1: - # assert integrated_data[0]['release'] == '112' - # assert integrated_data[0]['genomes'] == 50 - # assert integrated_data[0]['assemblies'] == 45 + assert len(integrated_data) == 1 + assert integrated_data[0]['release'] == '2025-07' + assert integrated_data[0]['genomes'] == 10 + + def test_get_integrated_data_specific_values(self, test_dbs): + """Test get_integrated_data returns specific expected values from test database.""" + metadata_uri = test_dbs['ensembl_genome_metadata'].dbc.url + generator = StatsGenerator(metadata_uri) + + integrated_data = generator.get_integrated_data() + + assert len(integrated_data) == 1 + if len(integrated_data) >= 1: + assert integrated_data[0]['release'] == '2025-07' + assert integrated_data[0]['genomes'] == 10 + assert integrated_data[0]['assemblies'] == 10 def test_count_datasets(self, test_dbs): """Test _count_datasets returns correct count for a specific release and dataset type.""" diff --git a/src/tests/test_utils.py b/src/tests/test_utils.py index 4d088a79..98dab17d 100644 --- a/src/tests/test_utils.py +++ b/src/tests/test_utils.py @@ -90,11 +90,11 @@ def test_get_genomes_from_assembly_accession_iterator(self, genome_conn, allow_u output = [ json.loads(json_format.MessageToJson(response)) for response in utils.get_genomes_from_assembly_accession_iterator( - db_conn=genome_conn, assembly_accession="GCA_000005845.2", release_version=None + db_conn=genome_conn, assembly_accession="GCA_000005845.2" ) ] - assert len(output) == expected_count + assert len(output) == 1 @pytest.mark.parametrize( "assembly_accession, release_version",