diff --git a/.travis.yml b/.travis.yml index a37434e1..b6d95510 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,11 +6,8 @@ python: services: - mysql before_script: - - pip install -r requirements.txt - pip install -r requirements-test.txt - pip install . - - export PYTHONPATH=$PYTHONPATH:$PWD/src - script: - - echo "DB_HOST ENV $DB_HOST" - - coverage run -m pytest --server mysql://travis@127.0.0.1:3306 + - echo "DB_HOST $METADATA_URI $TAXONOMY_URI" + - coverage run -m pytest -c pyproject.toml --server mysql://travis@127.0.0.1:3306 diff --git a/conftest.py b/conftest.py index 917f0e9a..35382af5 100644 --- a/conftest.py +++ b/conftest.py @@ -9,41 +9,5 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import os -from pathlib import Path - -from _pytest.config import Config -import pytest -import sqlalchemy as db - -from ensembl.production.metadata.grpc.adaptors.genome import GenomeAdaptor -from ensembl.production.metadata.grpc.adaptors.release import ReleaseAdaptor - +"""Global pytest configuration for Ensembl Metadata GRPC tests.""" pytest_plugins = ("ensembl.plugins.pytest_unittest",) - - -def pytest_configure(config: Config) -> None: - pytest.dbs_dir = Path(__file__).parent / 'src' / 'ensembl' / 'production' / 'metadata' / 'api' / 'sample' - -@pytest.fixture(scope="class") -def engine(multi_dbs): - os.environ["METADATA_URI"] = multi_dbs["ensembl_metadata"].dbc.url - os.environ["TAXONOMY_URI"] = multi_dbs["ncbi_taxonomy"].dbc.url - yield db.create_engine(multi_dbs["ensembl_metadata"].dbc.url) - - -@pytest.fixture(scope="class") -def genome_db_conn(multi_dbs): - genome_conn = GenomeAdaptor( - metadata_uri=multi_dbs["ensembl_metadata"].dbc.url, - taxonomy_uri=multi_dbs["ncbi_taxonomy"].dbc.url - ) - yield genome_conn - - -@pytest.fixture(scope="class") -def release_db_conn(multi_dbs): - release_conn = ReleaseAdaptor( - metadata_uri=multi_dbs["ensembl_metadata"].dbc.url - ) - yield release_conn diff --git a/requirements-test.txt b/requirements-test.txt index 019b6b38..318b9619 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -3,3 +3,4 @@ pytest pylint mypy coverage[toml] +pytest-grpc diff --git a/requirements.in b/requirements.in index a399c8ff..02963670 100644 --- a/requirements.in +++ b/requirements.in @@ -1,5 +1,7 @@ ensembl-py@git+https://github.com/Ensembl/ensembl-py.git@1.2.2 grpcio grpcio-tools +grpcio-reflection sqlalchemy types-pymysql +yagrc \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 27afc60a..69e46759 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,7 +4,7 @@ # # pip-compile requirements.in # -certifi==2023.11.17 +certifi==2024.2.2 # via requests charset-normalizer==3.3.2 # via requests @@ -16,11 +16,17 @@ exceptiongroup==1.2.0 # via pytest greenlet==3.0.3 # via sqlalchemy -grpcio==1.60.0 +grpcio==1.62.0 # via # -r requirements.in + # grpcio-reflection # grpcio-tools -grpcio-tools==1.60.0 + # yagrc +grpcio-reflection==1.62.0 + # via + # -r requirements.in + # yagrc +grpcio-tools==1.62.0 # via -r requirements.in idna==3.6 # via requests @@ -30,11 +36,14 @@ mysqlclient==2.1.1 # via ensembl-py packaging==23.2 # via pytest -pluggy==1.3.0 +pluggy==1.4.0 # via pytest -protobuf==4.25.2 - # via grpcio-tools -pytest==7.4.4 +protobuf==4.25.3 + # via + # grpcio-reflection + # grpcio-tools + # yagrc +pytest==8.0.1 # via # ensembl-py # pytest-dependency @@ -57,8 +66,10 @@ tomli==2.0.1 # via pytest types-pymysql==1.1.0.1 # via -r requirements.in -urllib3==2.1.0 +urllib3==2.2.1 # via requests +yagrc==1.1.2 + # via -r requirements.in # The following packages are considered to be unsafe in a requirements file: # setuptools diff --git a/src/ensembl/production/metadata/api/factory.py b/src/ensembl/production/metadata/api/factory.py index e74fcb3a..661453d0 100644 --- a/src/ensembl/production/metadata/api/factory.py +++ b/src/ensembl/production/metadata/api/factory.py @@ -40,7 +40,7 @@ def meta_factory(db_uri, metadata_uri, taxonomy_uri,force=False): # Dealing with other versionned databases like mart, ontology,... elif re.match(r'^\w+_?\d*_\d+$', db_url.database): raise Exception("other not implemented yet") - elif re.match(r'^ensembl_accounts|ensembl_archive|ensembl_autocomplete|ensembl_metadata|ensembl_production|' + elif re.match(r'^ensembl_accounts|ensembl_archive|ensembl_autocomplete|ensembl_genome_metadata|ensembl_production|' r'ensembl_stable_ids|ncbi_taxonomy|ontology|website', db_url.database): raise Exception("other not implemented yet") diff --git a/src/ensembl/production/metadata/api/sample/ensembl_metadata/assembly.txt b/src/ensembl/production/metadata/api/sample/ensembl_genome_metadata/assembly.txt similarity index 100% rename from src/ensembl/production/metadata/api/sample/ensembl_metadata/assembly.txt rename to src/ensembl/production/metadata/api/sample/ensembl_genome_metadata/assembly.txt diff --git a/src/ensembl/production/metadata/api/sample/ensembl_metadata/assembly_sequence.txt b/src/ensembl/production/metadata/api/sample/ensembl_genome_metadata/assembly_sequence.txt similarity index 100% rename from src/ensembl/production/metadata/api/sample/ensembl_metadata/assembly_sequence.txt rename to src/ensembl/production/metadata/api/sample/ensembl_genome_metadata/assembly_sequence.txt diff --git a/src/ensembl/production/metadata/api/sample/ensembl_metadata/attribute.txt b/src/ensembl/production/metadata/api/sample/ensembl_genome_metadata/attribute.txt similarity index 100% rename from src/ensembl/production/metadata/api/sample/ensembl_metadata/attribute.txt rename to src/ensembl/production/metadata/api/sample/ensembl_genome_metadata/attribute.txt diff --git a/src/ensembl/production/metadata/api/sample/ensembl_metadata/dataset.txt b/src/ensembl/production/metadata/api/sample/ensembl_genome_metadata/dataset.txt similarity index 100% rename from src/ensembl/production/metadata/api/sample/ensembl_metadata/dataset.txt rename to src/ensembl/production/metadata/api/sample/ensembl_genome_metadata/dataset.txt diff --git a/src/ensembl/production/metadata/api/sample/ensembl_metadata/dataset_attribute.txt b/src/ensembl/production/metadata/api/sample/ensembl_genome_metadata/dataset_attribute.txt similarity index 100% rename from src/ensembl/production/metadata/api/sample/ensembl_metadata/dataset_attribute.txt rename to src/ensembl/production/metadata/api/sample/ensembl_genome_metadata/dataset_attribute.txt diff --git a/src/ensembl/production/metadata/api/sample/ensembl_metadata/dataset_source.txt b/src/ensembl/production/metadata/api/sample/ensembl_genome_metadata/dataset_source.txt similarity index 100% rename from src/ensembl/production/metadata/api/sample/ensembl_metadata/dataset_source.txt rename to src/ensembl/production/metadata/api/sample/ensembl_genome_metadata/dataset_source.txt diff --git a/src/ensembl/production/metadata/api/sample/ensembl_metadata/dataset_type.txt b/src/ensembl/production/metadata/api/sample/ensembl_genome_metadata/dataset_type.txt similarity index 100% rename from src/ensembl/production/metadata/api/sample/ensembl_metadata/dataset_type.txt rename to src/ensembl/production/metadata/api/sample/ensembl_genome_metadata/dataset_type.txt diff --git a/src/ensembl/production/metadata/api/sample/ensembl_metadata/ensembl_release.txt b/src/ensembl/production/metadata/api/sample/ensembl_genome_metadata/ensembl_release.txt similarity index 100% rename from src/ensembl/production/metadata/api/sample/ensembl_metadata/ensembl_release.txt rename to src/ensembl/production/metadata/api/sample/ensembl_genome_metadata/ensembl_release.txt diff --git a/src/ensembl/production/metadata/api/sample/ensembl_metadata/ensembl_site.txt b/src/ensembl/production/metadata/api/sample/ensembl_genome_metadata/ensembl_site.txt similarity index 100% rename from src/ensembl/production/metadata/api/sample/ensembl_metadata/ensembl_site.txt rename to src/ensembl/production/metadata/api/sample/ensembl_genome_metadata/ensembl_site.txt diff --git a/src/ensembl/production/metadata/api/sample/ensembl_metadata/genome.txt b/src/ensembl/production/metadata/api/sample/ensembl_genome_metadata/genome.txt similarity index 100% rename from src/ensembl/production/metadata/api/sample/ensembl_metadata/genome.txt rename to src/ensembl/production/metadata/api/sample/ensembl_genome_metadata/genome.txt diff --git a/src/ensembl/production/metadata/api/sample/ensembl_metadata/genome_dataset.txt b/src/ensembl/production/metadata/api/sample/ensembl_genome_metadata/genome_dataset.txt similarity index 100% rename from src/ensembl/production/metadata/api/sample/ensembl_metadata/genome_dataset.txt rename to src/ensembl/production/metadata/api/sample/ensembl_genome_metadata/genome_dataset.txt diff --git a/src/ensembl/production/metadata/api/sample/ensembl_metadata/genome_release.txt b/src/ensembl/production/metadata/api/sample/ensembl_genome_metadata/genome_release.txt similarity index 100% rename from src/ensembl/production/metadata/api/sample/ensembl_metadata/genome_release.txt rename to src/ensembl/production/metadata/api/sample/ensembl_genome_metadata/genome_release.txt diff --git a/src/ensembl/production/metadata/api/sample/ensembl_metadata/organism.txt b/src/ensembl/production/metadata/api/sample/ensembl_genome_metadata/organism.txt similarity index 100% rename from src/ensembl/production/metadata/api/sample/ensembl_metadata/organism.txt rename to src/ensembl/production/metadata/api/sample/ensembl_genome_metadata/organism.txt diff --git a/src/ensembl/production/metadata/api/sample/ensembl_metadata/organism_group.txt b/src/ensembl/production/metadata/api/sample/ensembl_genome_metadata/organism_group.txt similarity index 100% rename from src/ensembl/production/metadata/api/sample/ensembl_metadata/organism_group.txt rename to src/ensembl/production/metadata/api/sample/ensembl_genome_metadata/organism_group.txt diff --git a/src/ensembl/production/metadata/api/sample/ensembl_metadata/organism_group_member.txt b/src/ensembl/production/metadata/api/sample/ensembl_genome_metadata/organism_group_member.txt similarity index 100% rename from src/ensembl/production/metadata/api/sample/ensembl_metadata/organism_group_member.txt rename to src/ensembl/production/metadata/api/sample/ensembl_genome_metadata/organism_group_member.txt diff --git a/src/ensembl/production/metadata/api/sample/ensembl_metadata/table.sql b/src/ensembl/production/metadata/api/sample/ensembl_genome_metadata/table.sql similarity index 99% rename from src/ensembl/production/metadata/api/sample/ensembl_metadata/table.sql rename to src/ensembl/production/metadata/api/sample/ensembl_genome_metadata/table.sql index f8079329..425efbf4 100644 --- a/src/ensembl/production/metadata/api/sample/ensembl_metadata/table.sql +++ b/src/ensembl/production/metadata/api/sample/ensembl_genome_metadata/table.sql @@ -167,7 +167,7 @@ CREATE TABLE genome assembly_id int not null, organism_id int not null, is_best tinyint(1) default 0 not null, - production_name varchar(255) default 'default' not null, + production_name varchar(255) not null, constraint genome_genome_uuid_6b62d0ad_uniq unique (genome_uuid), constraint genome_assembly_id_0a748388_fk_assembly_assembly_id diff --git a/src/ensembl/production/metadata/grpc/adaptors/base.py b/src/ensembl/production/metadata/grpc/adaptors/base.py index 8c9b0612..e88a017d 100644 --- a/src/ensembl/production/metadata/grpc/adaptors/base.py +++ b/src/ensembl/production/metadata/grpc/adaptors/base.py @@ -10,7 +10,7 @@ # See the License for the specific language governing permissions and # limitations under the License. from ensembl.database import DBConnection -from ensembl.production.metadata.grpc.config import MetadataConfig as config +from ensembl.production.metadata.grpc.config import MetadataConfig ##Todo: Add in OrganismAdapator. Subfunction fetches all organism in popular group. and # of genomes from distinct assemblies. @@ -20,7 +20,9 @@ class BaseAdaptor: def __init__(self, metadata_uri): - self.metadata_db = DBConnection(metadata_uri, pool_size=config.pool_size, pool_recycle=config.pool_recycle) + self.config = MetadataConfig() + self.metadata_db = DBConnection(metadata_uri, pool_size=self.config.pool_size, + pool_recycle=self.config.pool_recycle) def check_parameter(param): diff --git a/src/ensembl/production/metadata/grpc/adaptors/genome.py b/src/ensembl/production/metadata/grpc/adaptors/genome.py index ccccd50d..5977ff6f 100644 --- a/src/ensembl/production/metadata/grpc/adaptors/genome.py +++ b/src/ensembl/production/metadata/grpc/adaptors/genome.py @@ -28,7 +28,8 @@ class GenomeAdaptor(BaseAdaptor): def __init__(self, metadata_uri: str, taxonomy_uri: str): super().__init__(metadata_uri) - self.taxonomy_db = DBConnection(taxonomy_uri, pool_size=MetadataConfig.pool_size, pool_recycle=MetadataConfig.pool_recycle) + self.taxonomy_db = DBConnection(taxonomy_uri, pool_size=self.config.pool_size, + pool_recycle=self.config.pool_recycle) def fetch_taxonomy_names(self, taxonomy_ids, synonyms=None): diff --git a/src/ensembl/production/metadata/grpc/config.py b/src/ensembl/production/metadata/grpc/config.py index a2831b5b..1c18372c 100644 --- a/src/ensembl/production/metadata/grpc/config.py +++ b/src/ensembl/production/metadata/grpc/config.py @@ -48,10 +48,14 @@ def parse_boolean_var(var): class MetadataConfig: - metadata_uri = os.environ.get("METADATA_URI", f"mysql://ensembl@localhost:3306/ensembl_genome_metadata") - taxon_uri = os.environ.get("TAXONOMY_URI", f"mysql://ensembl@localhost:3306/ncbi_taxonomy") - pool_size = os.environ.get("POOL_SIZE", 20) - max_overflow = os.environ.get("MAX_OVERFLOW", 0) - pool_recycle = os.environ.get("POOL_RECYCLE", 50) - allow_unreleased = parse_boolean_var(os.environ.get("ALLOW_UNRELEASED", False)) - debug_mode = parse_boolean_var(os.environ.get("DEBUG", False)) \ No newline at end of file + + def __init__(self): + super().__init__() + self.metadata_uri = os.environ.get("METADATA_URI", + f"mysql://ensembl@localhost:3306/ensembl_genome_metadata") + self.taxon_uri = os.environ.get("TAXONOMY_URI", f"mysql://ensembl@localhost:3306/marco_ncbi_taxonomy") + self.pool_size = os.environ.get("POOL_SIZE", 20) + self.max_overflow = os.environ.get("MAX_OVERFLOW", 0) + self.pool_recycle = os.environ.get("POOL_RECYCLE", 50) + self.allow_unreleased = parse_boolean_var(os.environ.get("ALLOW_UNRELEASED", False)) + self.debug_mode = parse_boolean_var(os.environ.get("DEBUG", False)) diff --git a/src/ensembl/production/metadata/grpc/ensembl_metadata_pb2.py b/src/ensembl/production/metadata/grpc/ensembl_metadata_pb2.py index 4d4b1196..fb2eab1d 100755 --- a/src/ensembl/production/metadata/grpc/ensembl_metadata_pb2.py +++ b/src/ensembl/production/metadata/grpc/ensembl_metadata_pb2.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! -# source: ensembl/production/metadata/grpc/ensembl_metadata.proto +# source: ensembl/production/metadata/grpc/ensembl_genome_metadata.proto # Protobuf Python Version: 4.25.0 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor diff --git a/src/ensembl/production/metadata/grpc/service.py b/src/ensembl/production/metadata/grpc/service.py index 4c853211..684e3405 100644 --- a/src/ensembl/production/metadata/grpc/service.py +++ b/src/ensembl/production/metadata/grpc/service.py @@ -13,30 +13,39 @@ import grpc import logging -from ensembl.production.metadata.grpc.config import MetadataConfig as cfg -from ensembl.production.metadata.grpc import ensembl_metadata_pb2_grpc +from grpc_reflection.v1alpha import reflection + +from ensembl.production.metadata.grpc.config import MetadataConfig +from ensembl.production.metadata.grpc import ensembl_metadata_pb2_grpc, ensembl_metadata_pb2 from ensembl.production.metadata.grpc.servicer import EnsemblMetadataServicer logger = logging.getLogger(__name__) -# Determine the logging level based on the value of cfg.debug_mode -log_level = logging.DEBUG if cfg.debug_mode else logging.WARNING - -logging.basicConfig( - level=log_level, - format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' -) - def serve(): + cfg = MetadataConfig() + log_level = logging.DEBUG if cfg.debug_mode else logging.WARNING + + logging.basicConfig( + level=log_level, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' + ) server = grpc.server(futures.ThreadPoolExecutor(max_workers=10)) ensembl_metadata_pb2_grpc.add_EnsemblMetadataServicer_to_server( EnsemblMetadataServicer(), server ) + SERVICE_NAMES = ( + ensembl_metadata_pb2.DESCRIPTOR.services_by_name['EnsemblMetadata'].full_name, + reflection.SERVICE_NAME + ) + reflection.enable_server_reflection(SERVICE_NAMES, server) server.add_insecure_port("[::]:50051") server.start() try: + logger.info(f"Starting GRPC Server from {cfg.metadata_uri}") + logger.info(f"DEBUG: {cfg.debug_mode}") server.wait_for_termination() + yield server except KeyboardInterrupt: logger.info("KeyboardInterrupt caught, stopping the server...") server.stop(grace=0) # Immediately stop the server @@ -45,5 +54,4 @@ def serve(): if __name__ == "__main__": logger.info("gRPC server starting on port 50051...") - logger.info(f"DEBUG: {cfg.debug_mode}") serve() diff --git a/src/ensembl/production/metadata/grpc/servicer.py b/src/ensembl/production/metadata/grpc/servicer.py index 1ffdb266..285dae6e 100644 --- a/src/ensembl/production/metadata/grpc/servicer.py +++ b/src/ensembl/production/metadata/grpc/servicer.py @@ -21,6 +21,7 @@ class EnsemblMetadataServicer(ensembl_metadata_pb2_grpc.EnsemblMetadataServicer): def __init__(self): self.db = utils.connect_to_db() + super().__init__() def GetSpeciesInformation(self, request, context): logger.debug(f"Received RPC for GetSpeciesInformation with request: {request}") diff --git a/src/ensembl/production/metadata/grpc/utils.py b/src/ensembl/production/metadata/grpc/utils.py index 3d84087d..718ecb2f 100644 --- a/src/ensembl/production/metadata/grpc/utils.py +++ b/src/ensembl/production/metadata/grpc/utils.py @@ -14,7 +14,7 @@ from ensembl.production.metadata.api.models import Genome from ensembl.production.metadata.grpc import ensembl_metadata_pb2 -from ensembl.production.metadata.grpc.config import MetadataConfig as cfg +from ensembl.production.metadata.grpc.config import MetadataConfig from ensembl.production.metadata.grpc.adaptors.genome import GenomeAdaptor from ensembl.production.metadata.grpc.adaptors.release import ReleaseAdaptor import ensembl.production.metadata.grpc.protobuf_msg_factory as msg_factory @@ -24,8 +24,8 @@ def connect_to_db(): conn = GenomeAdaptor( - metadata_uri=cfg.metadata_uri, - taxonomy_uri=cfg.taxon_uri + metadata_uri=MetadataConfig().metadata_uri, + taxonomy_uri=MetadataConfig().taxon_uri ) return conn @@ -119,6 +119,7 @@ def get_assembly_information(db_conn, assembly_uuid): logger.debug("No assembly information was found.") return msg_factory.create_assembly_info() + # TODO: move this function to protobuf_msg_factory.py file def create_genome_with_attributes_and_count(db_conn, genome, release_version): # we fetch attributes related to that genome @@ -147,12 +148,11 @@ def get_genomes_from_assembly_accession_iterator(db_conn, assembly_accession, re if not assembly_accession: logging.warning("Missing or Empty Assembly accession field.") return msg_factory.create_genome() - # TODO: Add try except to the other functions as well try: genome_results = db_conn.fetch_genomes( assembly_accession=assembly_accession, - allow_unreleased=cfg.allow_unreleased + allow_unreleased=MetadataConfig().allow_unreleased ) except Exception as e: logging.error(f"Error fetching genomes: {e}") @@ -169,10 +169,9 @@ def get_species_information(db_conn, genome_uuid): if not genome_uuid: logger.warning("Missing or Empty Genome UUID field.") return msg_factory.create_species() - species_results = db_conn.fetch_genomes( genome_uuid=genome_uuid, - allow_unreleased=cfg.allow_unreleased + allow_unreleased=MetadataConfig().allow_unreleased ) if len(species_results) == 1: tax_id = species_results[0].Organism.taxonomy_id @@ -192,11 +191,10 @@ def get_sub_species_info(db_conn, organism_uuid, group): if not organism_uuid: logger.warning("Missing or Empty Organism UUID field.") return msg_factory.create_sub_species() - sub_species_results = db_conn.fetch_genomes( organism_uuid=organism_uuid, group=group, - allow_unreleased=cfg.allow_unreleased + allow_unreleased=MetadataConfig().allow_unreleased ) species_name = [] @@ -229,7 +227,7 @@ def get_genome_uuid(db_conn, production_name, assembly_name, use_default=False): production_name=production_name, assembly_name=assembly_name, use_default_assembly=use_default, - allow_unreleased=cfg.allow_unreleased + allow_unreleased=MetadataConfig().allow_unreleased ) if len(genome_uuid_result) == 1: @@ -250,11 +248,10 @@ def get_genome_by_uuid(db_conn, genome_uuid, release_version): if not genome_uuid: logger.warning("Missing or Empty Genome UUID field.") return msg_factory.create_genome() - genome_results = db_conn.fetch_genomes( genome_uuid=genome_uuid, release_version=release_version, - allow_unreleased=cfg.allow_unreleased + allow_unreleased=MetadataConfig().allow_unreleased ) if len(genome_results) == 1: @@ -305,12 +302,11 @@ def get_genome_by_name(db_conn, ensembl_name, site_name, release_version): if not ensembl_name and not site_name: logger.warning("Missing or Empty ensembl_name and site_name field.") return msg_factory.create_genome() - genome_results = db_conn.fetch_genomes( ensembl_name=ensembl_name, site_name=site_name, release_version=release_version, - allow_unreleased=cfg.allow_unreleased + allow_unreleased=MetadataConfig().allow_unreleased ) if len(genome_results) == 1: response_data = create_genome_with_attributes_and_count( @@ -330,13 +326,12 @@ def get_datasets_list_by_uuid(db_conn, genome_uuid, release_version): if not genome_uuid: logger.warning("Missing or Empty Genome UUID field.") return msg_factory.create_datasets() - datasets_results = db_conn.fetch_genome_datasets( genome_uuid=genome_uuid, # fetch all datasets, default is 'assembly' only dataset_type_name="all", release_version=release_version, - allow_unreleased=cfg.allow_unreleased, + allow_unreleased=MetadataConfig().allow_unreleased, dataset_attributes=True ) @@ -422,7 +417,7 @@ def genome_assembly_sequence_region(db_conn, genome_uuid, sequence_region_name): def release_iterator(metadata_db, site_name, release_version, current_only): - conn = ReleaseAdaptor(metadata_uri=cfg.metadata_uri) + conn = ReleaseAdaptor(metadata_uri=MetadataConfig().metadata_uri) # set release_version/site_name to None if it's an empty list release_version = release_version or None @@ -435,7 +430,8 @@ def release_iterator(metadata_db, site_name, release_version, current_only): ) for result in release_results: - logging.debug(f"Processing release: {result.EnsemblRelease.version if hasattr(result, 'EnsemblRelease') else None}") + logging.debug( + f"Processing release: {result.EnsemblRelease.version if hasattr(result, 'EnsemblRelease') else None}") yield msg_factory.create_release(result) @@ -443,13 +439,14 @@ def release_by_uuid_iterator(metadata_db, genome_uuid): if not genome_uuid: return - conn = ReleaseAdaptor(metadata_uri=cfg.metadata_uri) + conn = ReleaseAdaptor(metadata_uri=MetadataConfig().metadata_uri) release_results = conn.fetch_releases_for_genome( genome_uuid=genome_uuid, ) for result in release_results: - logging.debug(f"Processing release: {result.EnsemblRelease.version if hasattr(result, 'EnsemblRelease') else None}") + logging.debug( + f"Processing release: {result.EnsemblRelease.version if hasattr(result, 'EnsemblRelease') else None}") yield msg_factory.create_release(result) @@ -482,7 +479,7 @@ def get_genome_uuid_by_tag(db_conn, genome_tag): genome_uuid_result = db_conn.fetch_genomes( genome_tag=genome_tag, - allow_unreleased=cfg.allow_unreleased + allow_unreleased=MetadataConfig().allow_unreleased ) if len(genome_uuid_result) == 1: @@ -500,7 +497,6 @@ def get_genome_uuid_by_tag(db_conn, genome_tag): def get_ftp_links(db_conn, genome_uuid, dataset_type, release_version): - # Request is sending an empty string '' instead of None when # an input parameter is not supplied by the user if not genome_uuid: diff --git a/src/tests/conftest.py b/src/tests/conftest.py new file mode 100644 index 00000000..43f86692 --- /dev/null +++ b/src/tests/conftest.py @@ -0,0 +1,83 @@ +# See the NOTICE file distributed with this work for additional information +# regarding copyright ownership. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" Necessary fixtures for our GRPC API tests """ +import os +from pathlib import Path + +import pytest +import sqlalchemy as db +from _pytest.config import Config + +from ensembl.production.metadata.grpc.adaptors.genome import GenomeAdaptor +from ensembl.production.metadata.grpc.adaptors.release import ReleaseAdaptor + +from ensembl.production.metadata.grpc import ensembl_metadata_pb2 +from grpc_reflection.v1alpha import reflection + + +def pytest_configure(config: Config) -> None: + pytest.dbs_dir = Path(__file__).parent / 'src' / 'ensembl' / 'production' / 'metadata' / 'api' / 'sample' + + +@pytest.fixture(scope="module", autouse=True) +def engine(multi_dbs): + os.environ["METADATA_URI"] = multi_dbs["ensembl_genome_metadata"].dbc.url + os.environ["TAXONOMY_URI"] = multi_dbs["ncbi_taxonomy"].dbc.url + yield db.create_engine(multi_dbs["ensembl_genome_metadata"].dbc.url) + + +@pytest.fixture(scope="class") +def genome_db_conn(multi_dbs): + genome_conn = GenomeAdaptor( + metadata_uri=multi_dbs["ensembl_genome_metadata"].dbc.url, + taxonomy_uri=multi_dbs["ncbi_taxonomy"].dbc.url + ) + yield genome_conn + + +@pytest.fixture(scope="class") +def release_db_conn(multi_dbs): + release_conn = ReleaseAdaptor( + metadata_uri=multi_dbs["ensembl_genome_metadata"].dbc.url + ) + yield release_conn + + +@pytest.fixture(scope='module') +def grpc_add_to_server(): + from ensembl.production.metadata.grpc.ensembl_metadata_pb2_grpc import add_EnsemblMetadataServicer_to_server + + return add_EnsemblMetadataServicer_to_server + + +@pytest.fixture(scope='module') +def grpc_servicer(multi_dbs, engine): + from ensembl.production.metadata.grpc.servicer import EnsemblMetadataServicer + return EnsemblMetadataServicer() + + +@pytest.fixture(scope='module') +def grpc_server(_grpc_server, grpc_addr, grpc_add_to_server, grpc_servicer): + grpc_add_to_server(grpc_servicer, _grpc_server) + SERVICE_NAMES = ( + ensembl_metadata_pb2.DESCRIPTOR.services_by_name['EnsemblMetadata'].full_name, + reflection.SERVICE_NAME + ) + reflection.enable_server_reflection(SERVICE_NAMES, _grpc_server) + _grpc_server.add_insecure_port(grpc_addr) + _grpc_server.start() + yield _grpc_server + _grpc_server.stop(grace=None) diff --git a/src/tests/test_api.py b/src/tests/test_api.py index 340372a0..ecdff64d 100644 --- a/src/tests/test_api.py +++ b/src/tests/test_api.py @@ -25,13 +25,13 @@ sample_path = Path(__file__).parent.parent / "ensembl" / "production" / "metadata" / "api" / "sample" -# , {'src': 'ncbi_taxonomy'} -@pytest.mark.parametrize("multi_dbs", [[{"src": sample_path / "ensembl_metadata"}]], indirect=True) +@pytest.mark.parametrize("multi_dbs", [[{"src": sample_path / "ensembl_genome_metadata"}, + {"src": sample_path / "ncbi_taxonomy"}]], indirect=True) class TestApi: dbc = None # type: UnitTestDB def test_get_public_path(self, multi_dbs): - metadata_db = DBConnection(multi_dbs['ensembl_metadata'].dbc.url) + metadata_db = DBConnection(multi_dbs['ensembl_genome_metadata'].dbc.url) with metadata_db.session_scope() as session: genome = session.query(Genome).filter(Genome.genome_uuid == 'a733574a-93e7-11ec-a39d-005056b38ce3').first() paths = genome.get_public_path(dataset_type='all') @@ -50,7 +50,7 @@ def test_get_public_path(self, multi_dbs): # assert path[0]['path'] == 'Saccharomyces_cerevisiae_S288c/GCA_000146045.2/ensembl/regulation' def test_default_public_path(self, multi_dbs): - metadata_db = DBConnection(multi_dbs['ensembl_metadata'].dbc.url) + metadata_db = DBConnection(multi_dbs['ensembl_genome_metadata'].dbc.url) with metadata_db.session_scope() as session: # Homo sapien GRCH38 genome = session.query(Genome).filter(Genome.genome_uuid == 'a7335667-93e7-11ec-a39d-005056b38ce3').first() @@ -72,7 +72,7 @@ def test_organism_ensembl_name_compat(self, multi_dbs): """ Validate that we can still yse ensembl_name in queries from SQLAlchemy This test will fail when we remove the ORM column for good """ - metadata_db = DBConnection(multi_dbs['ensembl_metadata'].dbc.url) + metadata_db = DBConnection(multi_dbs['ensembl_genome_metadata'].dbc.url) with metadata_db.session_scope() as session: ensembl_name = session.query(Organism).filter(Organism.ensembl_name == 'SAMN12121739').first() biosample_id = session.query(Organism).filter(Organism.biosample_id == 'SAMN12121739').first() diff --git a/src/tests/test_grpc.py b/src/tests/test_grpc.py index 59941a02..3660b6cd 100644 --- a/src/tests/test_grpc.py +++ b/src/tests/test_grpc.py @@ -24,14 +24,14 @@ sample_path = Path(__file__).parent.parent / "ensembl" / "production" / "metadata" / "api" / "sample" logger = logging.getLogger(__name__) -@pytest.mark.parametrize("multi_dbs", [[{"src": sample_path / "ensembl_metadata"}, +@pytest.mark.parametrize("multi_dbs", [[{"src": sample_path / "ensembl_genome_metadata"}, {"src": sample_path / "ncbi_taxonomy"}]], indirect=True) class TestMetadataDB: dbc = None # type: UnitTestDB def test_load_database(self, multi_dbs): - db_test = ReleaseAdaptor(multi_dbs['ensembl_metadata'].dbc.url) + db_test = ReleaseAdaptor(multi_dbs['ensembl_genome_metadata'].dbc.url) assert db_test, "DB should not be empty" @pytest.mark.parametrize( @@ -48,7 +48,7 @@ def test_load_database(self, multi_dbs): ] ) def test_fetch_all_genomes(self, multi_dbs, allow_unreleased, unreleased_only, current_only, output_count): - conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_metadata'].dbc.url, + conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_genome_metadata'].dbc.url, taxonomy_uri=multi_dbs['ncbi_taxonomy'].dbc.url) test = conn.fetch_genomes( allow_unreleased=allow_unreleased, @@ -58,7 +58,7 @@ def test_fetch_all_genomes(self, multi_dbs, allow_unreleased, unreleased_only, c assert len(test) == output_count def test_fetch_with_all_args_no_conflict(self, multi_dbs): - conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_metadata'].dbc.url, + conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_genome_metadata'].dbc.url, taxonomy_uri=multi_dbs['ncbi_taxonomy'].dbc.url) test = conn.fetch_genomes( genome_uuid="a733550b-93e7-11ec-a39d-005056b38ce3", @@ -76,7 +76,7 @@ def test_fetch_with_all_args_no_conflict(self, multi_dbs): assert len(test) == 0 def test_fetch_with_all_args_conflict(self, multi_dbs): - conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_metadata'].dbc.url, + conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_genome_metadata'].dbc.url, taxonomy_uri=multi_dbs['ncbi_taxonomy'].dbc.url) test = conn.fetch_genomes( genome_uuid="a733550b-93e7-11ec-a39d-005056b38ce3", @@ -94,7 +94,7 @@ def test_fetch_with_all_args_conflict(self, multi_dbs): assert len(test) == 0 def test_fetch_releases(self, multi_dbs): - conn = ReleaseAdaptor(multi_dbs['ensembl_metadata'].dbc.url) + conn = ReleaseAdaptor(multi_dbs['ensembl_genome_metadata'].dbc.url) test = conn.fetch_releases(release_id=2) # test the one to many connection assert test[0].EnsemblSite.name == 'Ensembl' @@ -104,36 +104,36 @@ def test_fetch_releases(self, multi_dbs): # currently only have one release, so the testing is not comprehensive def test_fetch_releases_for_genome(self, multi_dbs): - conn = ReleaseAdaptor(multi_dbs['ensembl_metadata'].dbc.url) + conn = ReleaseAdaptor(multi_dbs['ensembl_genome_metadata'].dbc.url) test = conn.fetch_releases_for_genome('ae794660-8751-41cc-8883-b2fcdc7a74e8') assert test[0].EnsemblSite.name == 'Ensembl' def test_fetch_releases_for_dataset(self, multi_dbs): - conn = ReleaseAdaptor(multi_dbs['ensembl_metadata'].dbc.url) + conn = ReleaseAdaptor(multi_dbs['ensembl_genome_metadata'].dbc.url) test = conn.fetch_releases_for_dataset('3d653b2d-aa8d-4f7e-8f92-55f57c7cac3a') assert test[0].EnsemblSite.name == 'Ensembl' assert test[0].EnsemblRelease.label == 'beta-1' def test_fetch_taxonomy_names(self, multi_dbs): - conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_metadata'].dbc.url, + conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_genome_metadata'].dbc.url, taxonomy_uri=multi_dbs['ncbi_taxonomy'].dbc.url) test = conn.fetch_taxonomy_names(taxonomy_ids=[6239, 511145]) assert test[511145]['scientific_name'] == 'Escherichia coli str. K-12 substr. MG1655' def test_fetch_taxonomy_ids(self, multi_dbs): - conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_metadata'].dbc.url, + conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_genome_metadata'].dbc.url, taxonomy_uri=multi_dbs['ncbi_taxonomy'].dbc.url) test = conn.fetch_taxonomy_ids(taxonomy_names='Caenorhabditis elegans') assert test[0] == 6239 def test_fetch_genomes(self, multi_dbs): - conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_metadata'].dbc.url, + conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_genome_metadata'].dbc.url, taxonomy_uri=multi_dbs['ncbi_taxonomy'].dbc.url) test = conn.fetch_genomes(genome_uuid='a7335667-93e7-11ec-a39d-005056b38ce3') assert test[0].Organism.scientific_name == 'Homo sapiens' # def test_fetch_genomes_by_group_division(self, multi_dbs): - # conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_metadata'].dbc.url, + # conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_genome_metadata'].dbc.url, # taxonomy_uri=multi_dbs['ncbi_taxonomy'].dbc.url) # division_filter = 'EnsemblVertebrates' # test = conn.fetch_genomes(group=division_filter) @@ -142,25 +142,25 @@ def test_fetch_genomes(self, multi_dbs): # assert division_filter in division_results def test_fetch_genomes_by_genome_uuid(self, multi_dbs): - conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_metadata'].dbc.url, + conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_genome_metadata'].dbc.url, taxonomy_uri=multi_dbs['ncbi_taxonomy'].dbc.url) test = conn.fetch_genomes_by_genome_uuid('b00f5b0a-b434-4949-9c05-140826c96cd4') assert test[0].Organism.scientific_name == 'Oryzias latipes' def test_fetch_genome_by_ensembl_and_assembly_name(self, multi_dbs): - conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_metadata'].dbc.url, + conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_genome_metadata'].dbc.url, taxonomy_uri=multi_dbs['ncbi_taxonomy'].dbc.url) test = conn.fetch_genomes(assembly_name='NOD_ShiLtJ_v1', ensembl_name='SAMN04489827') assert test[0].Organism.scientific_name == 'Mus musculus' def test_fetch_genomes_by_assembly_accession(self, multi_dbs): - conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_metadata'].dbc.url, + conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_genome_metadata'].dbc.url, taxonomy_uri=multi_dbs['ncbi_taxonomy'].dbc.url) test = conn.fetch_genomes_by_assembly_accession('GCA_000005845.2') assert test[0].Organism.scientific_name == 'Escherichia coli str. K-12 substr. MG1655 str. K12' def test_fetch_genomes_by_assembly_sequence_accession(self, multi_dbs): - conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_metadata'].dbc.url, + conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_genome_metadata'].dbc.url, taxonomy_uri=multi_dbs['ncbi_taxonomy'].dbc.url) test = conn.fetch_sequences( genome_uuid='a7335667-93e7-11ec-a39d-005056b38ce3', @@ -170,7 +170,7 @@ def test_fetch_genomes_by_assembly_sequence_accession(self, multi_dbs): assert test[0].AssemblySequence.name == 'HG2280_PATCH' def test_fetch_genomes_by_assembly_sequence_accession_empty(self, multi_dbs): - conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_metadata'].dbc.url, + conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_genome_metadata'].dbc.url, taxonomy_uri=multi_dbs['ncbi_taxonomy'].dbc.url) test = conn.fetch_sequences( genome_uuid='s0m3-r4nd0m-g3n3-uu1d-v4lu3', @@ -180,19 +180,19 @@ def test_fetch_genomes_by_assembly_sequence_accession_empty(self, multi_dbs): assert len(test) == 0 def test_fetch_genomes_by_ensembl_name(self, multi_dbs): - conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_metadata'].dbc.url, + conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_genome_metadata'].dbc.url, taxonomy_uri=multi_dbs['ncbi_taxonomy'].dbc.url) test = conn.fetch_genomes_by_ensembl_name('SAMN04489826') assert test[0].Organism.scientific_name == 'Mus musculus' def test_fetch_genomes_by_taxonomy_id(self, multi_dbs): - conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_metadata'].dbc.url, + conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_genome_metadata'].dbc.url, taxonomy_uri=multi_dbs['ncbi_taxonomy'].dbc.url) test = conn.fetch_genomes_by_taxonomy_id(10090) assert test[0].Organism.scientific_name == 'Mus musculus' def test_fetch_genomes_by_scientific_name(self, multi_dbs): - conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_metadata'].dbc.url, + conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_genome_metadata'].dbc.url, taxonomy_uri=multi_dbs['ncbi_taxonomy'].dbc.url) test = conn.fetch_genomes_by_scientific_name( scientific_name='Oryzias latipes', @@ -201,7 +201,7 @@ def test_fetch_genomes_by_scientific_name(self, multi_dbs): assert test[0].Organism.common_name == 'Japanese medaka' def test_fetch_sequences(self, multi_dbs): - conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_metadata'].dbc.url, + conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_genome_metadata'].dbc.url, taxonomy_uri=multi_dbs['ncbi_taxonomy'].dbc.url) test = conn.fetch_sequences(assembly_uuid='9d6b239c-46dd-4c79-bc29-1089f348d31d') # this test is going to drive me nuts @@ -221,7 +221,7 @@ def test_fetch_sequences(self, multi_dbs): ) def test_fetch_sequences_chromosomal(self, multi_dbs, genome_uuid, assembly_accession, chromosomal_only, expected_output): - conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_metadata'].dbc.url, + conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_genome_metadata'].dbc.url, taxonomy_uri=multi_dbs['ncbi_taxonomy'].dbc.url) test = conn.fetch_sequences( genome_uuid=genome_uuid, @@ -244,7 +244,7 @@ def test_fetch_sequences_chromosomal(self, multi_dbs, genome_uuid, assembly_acce ) def test_fetch_sequences_by_assembly_seq_name(self, multi_dbs, genome_uuid, assembly_sequence_name, chromosomal_only, expected_output): - conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_metadata'].dbc.url, + conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_genome_metadata'].dbc.url, taxonomy_uri=multi_dbs['ncbi_taxonomy'].dbc.url) test = conn.fetch_sequences( genome_uuid=genome_uuid, @@ -272,7 +272,7 @@ def test_fetch_genome_dataset_all( dataset_uuid, allow_unreleased, unreleased_only, expected_dataset_uuid, expected_count): - conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_metadata'].dbc.url, + conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_genome_metadata'].dbc.url, taxonomy_uri=multi_dbs['ncbi_taxonomy'].dbc.url) test = conn.fetch_genome_datasets( genome_uuid=genome_uuid, @@ -297,7 +297,7 @@ def test_fetch_genome_dataset_all( ] ) def test_fetch_genome_dataset_by_organism_uuid(self, multi_dbs, organism_uuid, expected_count): - conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_metadata'].dbc.url, + conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_genome_metadata'].dbc.url, taxonomy_uri=multi_dbs['ncbi_taxonomy'].dbc.url) test = conn.fetch_genome_datasets( organism_uuid=organism_uuid, @@ -314,7 +314,7 @@ def test_fetch_genome_dataset_by_organism_uuid(self, multi_dbs, organism_uuid, e ] ) def test_fetch_genome_uuid(self, multi_dbs, production_name, assembly_name, use_default_assembly, expected_output): - conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_metadata'].dbc.url, + conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_genome_metadata'].dbc.url, taxonomy_uri=multi_dbs['ncbi_taxonomy'].dbc.url) test = conn.fetch_genomes( production_name=production_name, @@ -335,7 +335,7 @@ def test_fetch_genome_uuid(self, multi_dbs, production_name, assembly_name, use_ ) def test_fetch_genome_uuid_is_current(self, multi_dbs, production_name, assembly_name, use_default_assembly, expected_output): - conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_metadata'].dbc.url, + conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_genome_metadata'].dbc.url, taxonomy_uri=multi_dbs['ncbi_taxonomy'].dbc.url) test = conn.fetch_genomes( production_name=production_name, @@ -354,7 +354,7 @@ def test_fetch_genome_uuid_is_current(self, multi_dbs, production_name, assembly ] ) def test_fetch_genome_uuid_empty(self, multi_dbs, production_name, assembly_name, use_default_assembly): - conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_metadata'].dbc.url, + conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_genome_metadata'].dbc.url, taxonomy_uri=multi_dbs['ncbi_taxonomy'].dbc.url) test = conn.fetch_genomes( production_name=production_name, @@ -372,7 +372,7 @@ def test_fetch_genome_uuid_empty(self, multi_dbs, production_name, assembly_name ) def test_fetch_organisms_group_counts(self, multi_dbs, species_taxonomy_id, expected_organism, expected_assemblies_count): - conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_metadata'].dbc.url, + conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_genome_metadata'].dbc.url, taxonomy_uri=multi_dbs['ncbi_taxonomy'].dbc.url) test = conn.fetch_organisms_group_counts() # When fetching everything: @@ -396,7 +396,7 @@ def test_fetch_organisms_group_counts(self, multi_dbs, species_taxonomy_id, expe ) def test_fetch_related_assemblies_count(self, multi_dbs, organism_uuid, expected_assemblies_count): conn = GenomeAdaptor( - metadata_uri=multi_dbs['ensembl_metadata'].dbc.url, + metadata_uri=multi_dbs['ensembl_genome_metadata'].dbc.url, taxonomy_uri=multi_dbs['ncbi_taxonomy'].dbc.url ) @@ -416,7 +416,7 @@ def test_fetch_related_assemblies_count(self, multi_dbs, organism_uuid, expected def test_fetch_genomes_info(self, multi_dbs, allow_unreleased, output_count, expected_genome_uuid): # FIXME This test takes ages, and generate a lot of unitary queries. SqlAlchemy results needs review before # moving to 2000 - conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_metadata'].dbc.url, + conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_genome_metadata'].dbc.url, taxonomy_uri=multi_dbs['ncbi_taxonomy'].dbc.url) test = conn.fetch_genomes_info( allow_unreleased_genomes=allow_unreleased, diff --git a/src/tests/test_protobuf_msg_factory.py b/src/tests/test_protobuf_msg_factory.py index 7442e3c2..1e734a11 100644 --- a/src/tests/test_protobuf_msg_factory.py +++ b/src/tests/test_protobuf_msg_factory.py @@ -26,7 +26,7 @@ sample_path = Path(distribution.location) / "ensembl" / "production" / "metadata" / "api" / "sample" -@pytest.mark.parametrize("multi_dbs", [[{"src": sample_path / "ensembl_metadata"}, +@pytest.mark.parametrize("multi_dbs", [[{"src": sample_path / "ensembl_genome_metadata"}, {"src": sample_path / "ncbi_taxonomy"}]], indirect=True) class TestClass: diff --git a/src/tests/test_reflection.py b/src/tests/test_reflection.py new file mode 100644 index 00000000..8ac20342 --- /dev/null +++ b/src/tests/test_reflection.py @@ -0,0 +1,60 @@ +# See the NOTICE file distributed with this work for additional information +# regarding copyright ownership. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" Test Server Reflection discovery """ + +import logging +from pathlib import Path + +import pytest +from google.protobuf.descriptor import MethodDescriptor +from google.protobuf.descriptor_pool import DescriptorPool +from grpc_reflection.v1alpha.proto_reflection_descriptor_database import ProtoReflectionDescriptorDatabase +from yagrc import reflector as yagrc_reflector + + +logger = logging.getLogger(__name__) + +sample_path = Path(__file__).parent.parent / "ensembl" / "production" / "metadata" / "api" / "sample" + + +@pytest.mark.parametrize("multi_dbs", [[{"src": sample_path / "ensembl_genome_metadata"}, + {"src": sample_path / "ncbi_taxonomy"}]], + indirect=True) +class TestGRPCReflection: + dbc = None + + def test_services_discovery(self, multi_dbs, grpc_channel, grpc_server): + from ensembl.production.metadata.grpc.ensembl_metadata_pb2_grpc import EnsemblMetadata + + reflection_db = ProtoReflectionDescriptorDatabase(grpc_channel) + services = reflection_db.get_services() + assert 'ensembl_metadata.EnsemblMetadata' in services + assert 'grpc.reflection.v1alpha.ServerReflection' in services + desc_pool = DescriptorPool(reflection_db) + metadata_service = desc_pool.FindServiceByName('ensembl_metadata.EnsemblMetadata') + method_list = [func for func in dir(EnsemblMetadata) if + callable(getattr(EnsemblMetadata, func)) and not func.startswith("__")] + for method_name in method_list: + method_desc = metadata_service.FindMethodByName(method_name) + assert isinstance(method_desc, MethodDescriptor) + + def test_dynamic_invoke(self, multi_dbs, grpc_channel, grpc_server): + logger.warning("multi dbs", multi_dbs) + reflector = yagrc_reflector.GrpcReflectionClient() + reflector.load_protocols(grpc_channel, symbols=["ensembl_metadata.EnsemblMetadata"]) + stub_class = reflector.service_stub_class("ensembl_metadata.EnsemblMetadata") + request_class = reflector.message_class("ensembl_metadata.GenomeUUIDRequest") + stub = stub_class(grpc_channel) + response = stub.GetGenomeByUUID(request_class(genome_uuid='a73351f7-93e7-11ec-a39d-005056b38ce3', + release_version=None)) + assert response.genome_uuid == 'a73351f7-93e7-11ec-a39d-005056b38ce3' + assert response.assembly.accession == 'GCA_000005845.2' diff --git a/src/tests/test_updater.py b/src/tests/test_updater.py index da6174ec..371b449b 100644 --- a/src/tests/test_updater.py +++ b/src/tests/test_updater.py @@ -30,8 +30,11 @@ db_directory = Path(__file__).parent / 'databases' db_directory = db_directory.resolve() +sample_path = Path(__file__).parent.parent / "ensembl" / "production" / "metadata" / "api" / "sample" -@pytest.mark.parametrize("multi_dbs", [[{'src': 'ensembl_metadata'}, {'src': 'ncbi_taxonomy'}, + +@pytest.mark.parametrize("multi_dbs", [[{'src': sample_path / 'ensembl_genome_metadata'}, + {'src': sample_path / 'ncbi_taxonomy'}, {'src': db_directory / 'core_1'}, {'src': db_directory / 'core_2'}, {'src': db_directory / 'core_3'}, {'src': db_directory / 'core_4'}, {'src': db_directory / 'core_5'}, {'src': db_directory / 'core_6'}, @@ -44,7 +47,8 @@ class TestUpdater: dbc = None # type: UnitTestDB def test_new_organism(self, multi_dbs): - test = meta_factory(multi_dbs['core_1'].dbc.url, multi_dbs['ensembl_metadata'].dbc.url, + test = meta_factory(multi_dbs['core_1'].dbc.url, + multi_dbs['ensembl_genome_metadata'].dbc.url, multi_dbs['ncbi_taxonomy'].dbc.url) test.process_core() @@ -59,7 +63,7 @@ def test_new_organism(self, multi_dbs): assert inserted_genome_uuid is not None # Look for organism, assembly and geneset - metadata_db = DBConnection(multi_dbs['ensembl_metadata'].dbc.url) + metadata_db = DBConnection(multi_dbs['ensembl_genome_metadata'].dbc.url) # Test the species with metadata_db.session_scope() as session: organism = session.query(Organism).where(Organism.biosample_id == 'Jabberwocky').first() @@ -93,7 +97,7 @@ def test_new_organism(self, multi_dbs): assert sequence3 is not None def test_fail_existing_genome_uuid_no_data(self, multi_dbs): - test = meta_factory(multi_dbs['core_2'].dbc.url, multi_dbs['ensembl_metadata'].dbc.url, + test = meta_factory(multi_dbs['core_2'].dbc.url, multi_dbs['ensembl_genome_metadata'].dbc.url, multi_dbs['ncbi_taxonomy'].dbc.url) with pytest.raises(Exception) as exif: test.process_core() @@ -102,10 +106,10 @@ def test_fail_existing_genome_uuid_no_data(self, multi_dbs): "Please remove it from the meta key and resubmit" in str(exif.value)) def test_update_assembly(self, multi_dbs): - test = meta_factory(multi_dbs['core_3'].dbc.url, multi_dbs['ensembl_metadata'].dbc.url, + test = meta_factory(multi_dbs['core_3'].dbc.url, multi_dbs['ensembl_genome_metadata'].dbc.url, multi_dbs['ncbi_taxonomy'].dbc.url) test.process_core() - metadata_db = DBConnection(multi_dbs['ensembl_metadata'].dbc.url) + metadata_db = DBConnection(multi_dbs['ensembl_genome_metadata'].dbc.url) with metadata_db.session_scope() as session: organism = session.query(Organism).where(Organism.biosample_id == 'Jabberwocky').first() assert organism.scientific_name == 'carol_jabberwocky' @@ -113,10 +117,10 @@ def test_update_assembly(self, multi_dbs): # def test_update_geneset(self, multi_dbs): - test = meta_factory(multi_dbs['core_4'].dbc.url, multi_dbs['ensembl_metadata'].dbc.url, + test = meta_factory(multi_dbs['core_4'].dbc.url, multi_dbs['ensembl_genome_metadata'].dbc.url, multi_dbs['ncbi_taxonomy'].dbc.url) test.process_core() - metadata_db = DBConnection(multi_dbs['ensembl_metadata'].dbc.url) + metadata_db = DBConnection(multi_dbs['ensembl_genome_metadata'].dbc.url) with metadata_db.session_scope() as session: dataset = session.query(Dataset).where( (Dataset.version == "ENS02") & (Dataset.name == 'genebuild') @@ -127,16 +131,16 @@ def test_update_geneset(self, multi_dbs): assert dataset.dataset_type.name == "genebuild" def test_taxonomy_common_name(self, multi_dbs): - test = meta_factory(multi_dbs['core_5'].dbc.url, multi_dbs['ensembl_metadata'].dbc.url, + test = meta_factory(multi_dbs['core_5'].dbc.url, multi_dbs['ensembl_genome_metadata'].dbc.url, multi_dbs['ncbi_taxonomy'].dbc.url) test.process_core() - metadata_db = DBConnection(multi_dbs['ensembl_metadata'].dbc.url) + metadata_db = DBConnection(multi_dbs['ensembl_genome_metadata'].dbc.url) with metadata_db.session_scope() as session: organism = session.query(Organism).where(Organism.biosample_id == 'test_case_5').first() assert organism.common_name == 'sheep' def test_fail_existing_genome_uuid_data_not_match(self, multi_dbs): - test = meta_factory(multi_dbs['core_6'].dbc.url, multi_dbs['ensembl_metadata'].dbc.url, + test = meta_factory(multi_dbs['core_6'].dbc.url, multi_dbs['ensembl_genome_metadata'].dbc.url, multi_dbs['ncbi_taxonomy'].dbc.url) with pytest.raises(Exception) as exif: test.process_core() @@ -144,10 +148,10 @@ def test_fail_existing_genome_uuid_data_not_match(self, multi_dbs): "The force flag was not specified so the core was not updated." in str(exif.value)) def test_update_unreleased_no_force(self, multi_dbs): - test = meta_factory(multi_dbs['core_7'].dbc.url, multi_dbs['ensembl_metadata'].dbc.url, + test = meta_factory(multi_dbs['core_7'].dbc.url, multi_dbs['ensembl_genome_metadata'].dbc.url, multi_dbs['ncbi_taxonomy'].dbc.url) test.process_core() - metadata_db = DBConnection(multi_dbs['ensembl_metadata'].dbc.url) + metadata_db = DBConnection(multi_dbs['ensembl_genome_metadata'].dbc.url) with metadata_db.session_scope() as session: # Test that assembly seqs have been updated new_seq = session.query(AssemblySequence).where( @@ -205,7 +209,7 @@ def test_update_unreleased_no_force(self, multi_dbs): assert count > 0 def test_update_released_no_force(self, multi_dbs): - test = meta_factory(multi_dbs['core_8'].dbc.url, multi_dbs['ensembl_metadata'].dbc.url, + test = meta_factory(multi_dbs['core_8'].dbc.url, multi_dbs['ensembl_genome_metadata'].dbc.url, multi_dbs['ncbi_taxonomy'].dbc.url) with pytest.raises(Exception) as exif: test.process_core() @@ -215,11 +219,11 @@ def test_update_released_no_force(self, multi_dbs): exif.value)) def test_update_released_force(self, multi_dbs): - test = meta_factory(multi_dbs['core_9'].dbc.url, multi_dbs['ensembl_metadata'].dbc.url, + test = meta_factory(multi_dbs['core_9'].dbc.url, multi_dbs['ensembl_genome_metadata'].dbc.url, multi_dbs['ncbi_taxonomy'].dbc.url, force=True) # FIXME Should be run # test.process_core() - metadata_db = DBConnection(multi_dbs['ensembl_metadata'].dbc.url) + metadata_db = DBConnection(multi_dbs['ensembl_genome_metadata'].dbc.url) with metadata_db.session_scope() as session: # Test that assembly seqs have not been updated # new_seq = session.query(AssemblySequence).where( diff --git a/src/tests/test_utils.py b/src/tests/test_utils.py index 630c49cd..d272fba4 100644 --- a/src/tests/test_utils.py +++ b/src/tests/test_utils.py @@ -24,9 +24,12 @@ db_directory = Path(__file__).parent / 'databases' db_directory = db_directory.resolve() +sample_path = Path(__file__).parent.parent / "ensembl" / "production" / "metadata" / "api" / "sample" -@pytest.mark.parametrize("multi_dbs", [[{'src': 'ensembl_metadata'}, {'src': 'ncbi_taxonomy'}]], indirect=True) +@pytest.mark.parametrize("multi_dbs", + [[{'src': sample_path / 'ensembl_genome_metadata'}, {'src': sample_path / 'ncbi_taxonomy'}]], + indirect=True) class TestUtils: dbc = None # type: UnitTestDB