From 2c1bb308961d6bc3012a39f4776d86fa61d79e7d Mon Sep 17 00:00:00 2001 From: Marc Chakiachvili Date: Wed, 21 Feb 2024 17:00:37 +0000 Subject: [PATCH 01/13] Added server reflection to services + quick tests --- requirements-test.txt | 1 + requirements.in | 1 + requirements.txt | 21 +++-- .../production/metadata/grpc/service.py | 10 ++- .../production/metadata/grpc/servicer.py | 1 + src/tests/test_reflection.py | 79 +++++++++++++++++++ 6 files changed, 104 insertions(+), 9 deletions(-) create mode 100644 src/tests/test_reflection.py diff --git a/requirements-test.txt b/requirements-test.txt index 019b6b38..318b9619 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -3,3 +3,4 @@ pytest pylint mypy coverage[toml] +pytest-grpc diff --git a/requirements.in b/requirements.in index a399c8ff..53f572ee 100644 --- a/requirements.in +++ b/requirements.in @@ -1,5 +1,6 @@ ensembl-py@git+https://github.com/Ensembl/ensembl-py.git@1.2.2 grpcio grpcio-tools +grpcio-reflection sqlalchemy types-pymysql diff --git a/requirements.txt b/requirements.txt index 27afc60a..e7d5f9ea 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,7 +4,7 @@ # # pip-compile requirements.in # -certifi==2023.11.17 +certifi==2024.2.2 # via requests charset-normalizer==3.3.2 # via requests @@ -16,11 +16,14 @@ exceptiongroup==1.2.0 # via pytest greenlet==3.0.3 # via sqlalchemy -grpcio==1.60.0 +grpcio==1.60.1 # via # -r requirements.in + # grpcio-reflection # grpcio-tools -grpcio-tools==1.60.0 +grpcio-reflection==1.60.1 + # via -r requirements.in +grpcio-tools==1.60.1 # via -r requirements.in idna==3.6 # via requests @@ -30,11 +33,13 @@ mysqlclient==2.1.1 # via ensembl-py packaging==23.2 # via pytest -pluggy==1.3.0 +pluggy==1.4.0 # via pytest -protobuf==4.25.2 - # via grpcio-tools -pytest==7.4.4 +protobuf==4.25.3 + # via + # grpcio-reflection + # grpcio-tools +pytest==8.0.1 # via # ensembl-py # pytest-dependency @@ -57,7 +62,7 @@ tomli==2.0.1 # via pytest types-pymysql==1.1.0.1 # via -r requirements.in -urllib3==2.1.0 +urllib3==2.2.1 # via requests # The following packages are considered to be unsafe in a requirements file: diff --git a/src/ensembl/production/metadata/grpc/service.py b/src/ensembl/production/metadata/grpc/service.py index 4c853211..f6934393 100644 --- a/src/ensembl/production/metadata/grpc/service.py +++ b/src/ensembl/production/metadata/grpc/service.py @@ -13,8 +13,10 @@ import grpc import logging +from grpc_reflection.v1alpha import reflection + from ensembl.production.metadata.grpc.config import MetadataConfig as cfg -from ensembl.production.metadata.grpc import ensembl_metadata_pb2_grpc +from ensembl.production.metadata.grpc import ensembl_metadata_pb2_grpc, ensembl_metadata_pb2 from ensembl.production.metadata.grpc.servicer import EnsemblMetadataServicer logger = logging.getLogger(__name__) @@ -33,9 +35,15 @@ def serve(): ensembl_metadata_pb2_grpc.add_EnsemblMetadataServicer_to_server( EnsemblMetadataServicer(), server ) + SERVICE_NAMES = ( + ensembl_metadata_pb2.DESCRIPTOR.services_by_name['EnsemblMetadata'].full_name, + reflection.SERVICE_NAME + ) + reflection.enable_server_reflection(SERVICE_NAMES, server) server.add_insecure_port("[::]:50051") server.start() try: + logger.info("Starting GRPC Server") server.wait_for_termination() except KeyboardInterrupt: logger.info("KeyboardInterrupt caught, stopping the server...") diff --git a/src/ensembl/production/metadata/grpc/servicer.py b/src/ensembl/production/metadata/grpc/servicer.py index 1ffdb266..285dae6e 100644 --- a/src/ensembl/production/metadata/grpc/servicer.py +++ b/src/ensembl/production/metadata/grpc/servicer.py @@ -21,6 +21,7 @@ class EnsemblMetadataServicer(ensembl_metadata_pb2_grpc.EnsemblMetadataServicer): def __init__(self): self.db = utils.connect_to_db() + super().__init__() def GetSpeciesInformation(self, request, context): logger.debug(f"Received RPC for GetSpeciesInformation with request: {request}") diff --git a/src/tests/test_reflection.py b/src/tests/test_reflection.py new file mode 100644 index 00000000..a952cf70 --- /dev/null +++ b/src/tests/test_reflection.py @@ -0,0 +1,79 @@ +# See the NOTICE file distributed with this work for additional information +# regarding copyright ownership. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" Test Server Reflection discovery """ + +import logging +from pathlib import Path + +import pytest +from google.protobuf.descriptor import MethodDescriptor +from grpc_reflection.v1alpha import reflection +from google.protobuf.descriptor_pool import DescriptorPool +from ensembl.production.metadata.grpc import ensembl_metadata_pb2 +from ensembl.production.metadata.grpc.ensembl_metadata_pb2_grpc import EnsemblMetadata + +sample_path = Path(__file__).parent.parent / "ensembl" / "production" / "metadata" / "api" / "sample" + +logger = logging.getLogger(__name__) + + +@pytest.fixture(scope='module') +def grpc_add_to_server(): + from ensembl.production.metadata.grpc.ensembl_metadata_pb2_grpc import add_EnsemblMetadataServicer_to_server + + return add_EnsemblMetadataServicer_to_server + + +@pytest.fixture(scope='module') +def grpc_servicer(): + from ensembl.production.metadata.grpc.servicer import EnsemblMetadataServicer + + return EnsemblMetadataServicer() + + +@pytest.fixture(scope='module') +def grpc_stub_cls(grpc_channel): + from ensembl.production.metadata.grpc.ensembl_metadata_pb2_grpc import EnsemblMetadataStub + + return EnsemblMetadataStub + + +@pytest.fixture(scope='module') +def grpc_server(_grpc_server, grpc_addr): + SERVICE_NAMES = ( + ensembl_metadata_pb2.DESCRIPTOR.services_by_name['EnsemblMetadata'].full_name, + reflection.SERVICE_NAME + ) + reflection.enable_server_reflection(SERVICE_NAMES, _grpc_server) + # _grpc_server.ad(grpc_addr) + _grpc_server.add_insecure_port(grpc_addr) + _grpc_server.start() + yield _grpc_server + _grpc_server.stop(grace=None) + + +class TestGRPCReflection: + dbc = None + + def test_services_discovery(self, grpc_stub_cls, grpc_channel, grpc_servicer): + from grpc_reflection.v1alpha.proto_reflection_descriptor_database import ProtoReflectionDescriptorDatabase + reflection_db = ProtoReflectionDescriptorDatabase(grpc_channel) + services = reflection_db.get_services() + desc_pool = DescriptorPool(reflection_db) + metadata_service = desc_pool.FindServiceByName('ensembl_metadata.EnsemblMetadata') + method_list = [func for func in dir(EnsemblMetadata) if + callable(getattr(EnsemblMetadata, func)) and not func.startswith("__")] + for method_name in method_list: + method_desc = metadata_service.FindMethodByName(method_name) + assert isinstance(method_desc, MethodDescriptor) + assert 'ensembl_metadata.EnsemblMetadata' in services + assert 'grpc.reflection.v1alpha.ServerReflection' in services From 33c621bee0311c75dc60a73704b1ca59471a26c6 Mon Sep 17 00:00:00 2001 From: Marc Chakiachvili Date: Wed, 21 Feb 2024 18:48:22 +0000 Subject: [PATCH 02/13] Updated test init --- src/ensembl/production/metadata/grpc/service.py | 2 +- src/tests/test_reflection.py | 8 +++----- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/src/ensembl/production/metadata/grpc/service.py b/src/ensembl/production/metadata/grpc/service.py index f6934393..23cd69dc 100644 --- a/src/ensembl/production/metadata/grpc/service.py +++ b/src/ensembl/production/metadata/grpc/service.py @@ -43,7 +43,7 @@ def serve(): server.add_insecure_port("[::]:50051") server.start() try: - logger.info("Starting GRPC Server") + logger.info(f"Starting GRPC Server from {cfg.metadata_uri}") server.wait_for_termination() except KeyboardInterrupt: logger.info("KeyboardInterrupt caught, stopping the server...") diff --git a/src/tests/test_reflection.py b/src/tests/test_reflection.py index a952cf70..6e4e0626 100644 --- a/src/tests/test_reflection.py +++ b/src/tests/test_reflection.py @@ -12,17 +12,15 @@ """ Test Server Reflection discovery """ import logging -from pathlib import Path import pytest from google.protobuf.descriptor import MethodDescriptor -from grpc_reflection.v1alpha import reflection from google.protobuf.descriptor_pool import DescriptorPool +from grpc_reflection.v1alpha import reflection + from ensembl.production.metadata.grpc import ensembl_metadata_pb2 from ensembl.production.metadata.grpc.ensembl_metadata_pb2_grpc import EnsemblMetadata -sample_path = Path(__file__).parent.parent / "ensembl" / "production" / "metadata" / "api" / "sample" - logger = logging.getLogger(__name__) @@ -62,7 +60,6 @@ def grpc_server(_grpc_server, grpc_addr): class TestGRPCReflection: - dbc = None def test_services_discovery(self, grpc_stub_cls, grpc_channel, grpc_servicer): from grpc_reflection.v1alpha.proto_reflection_descriptor_database import ProtoReflectionDescriptorDatabase @@ -73,6 +70,7 @@ def test_services_discovery(self, grpc_stub_cls, grpc_channel, grpc_servicer): method_list = [func for func in dir(EnsemblMetadata) if callable(getattr(EnsemblMetadata, func)) and not func.startswith("__")] for method_name in method_list: + print(method_name) method_desc = metadata_service.FindMethodByName(method_name) assert isinstance(method_desc, MethodDescriptor) assert 'ensembl_metadata.EnsemblMetadata' in services From 461436070417be9f927200e274ccf0ee6f201bbc Mon Sep 17 00:00:00 2001 From: Marc Chakiachvili Date: Wed, 21 Feb 2024 18:55:18 +0000 Subject: [PATCH 03/13] USe DB files even though not actually used. --- src/tests/test_reflection.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/tests/test_reflection.py b/src/tests/test_reflection.py index 6e4e0626..ab636c48 100644 --- a/src/tests/test_reflection.py +++ b/src/tests/test_reflection.py @@ -12,6 +12,7 @@ """ Test Server Reflection discovery """ import logging +from pathlib import Path import pytest from google.protobuf.descriptor import MethodDescriptor @@ -59,9 +60,14 @@ def grpc_server(_grpc_server, grpc_addr): _grpc_server.stop(grace=None) +sample_path = Path(__file__).parent.parent / "ensembl" / "production" / "metadata" / "api" / "sample" + +@pytest.mark.parametrize("multi_dbs", [[{"src": sample_path / "ensembl_metadata"}, + {"src": sample_path / "ncbi_taxonomy"}]], + indirect=True) class TestGRPCReflection: - def test_services_discovery(self, grpc_stub_cls, grpc_channel, grpc_servicer): + def test_services_discovery(self, grpc_stub_cls, grpc_channel, grpc_servicer, multi_dbs): from grpc_reflection.v1alpha.proto_reflection_descriptor_database import ProtoReflectionDescriptorDatabase reflection_db = ProtoReflectionDescriptorDatabase(grpc_channel) services = reflection_db.get_services() From 813c85696033305216fac28ab0a47f37e52a0eb1 Mon Sep 17 00:00:00 2001 From: Marc Chakiachvili Date: Wed, 21 Feb 2024 19:12:44 +0000 Subject: [PATCH 04/13] USe DB files even though not actually used. --- src/tests/test_reflection.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/src/tests/test_reflection.py b/src/tests/test_reflection.py index ab636c48..5923891a 100644 --- a/src/tests/test_reflection.py +++ b/src/tests/test_reflection.py @@ -18,6 +18,7 @@ from google.protobuf.descriptor import MethodDescriptor from google.protobuf.descriptor_pool import DescriptorPool from grpc_reflection.v1alpha import reflection +from grpc_reflection.v1alpha.proto_reflection_descriptor_database import ProtoReflectionDescriptorDatabase from ensembl.production.metadata.grpc import ensembl_metadata_pb2 from ensembl.production.metadata.grpc.ensembl_metadata_pb2_grpc import EnsemblMetadata @@ -62,22 +63,31 @@ def grpc_server(_grpc_server, grpc_addr): sample_path = Path(__file__).parent.parent / "ensembl" / "production" / "metadata" / "api" / "sample" + @pytest.mark.parametrize("multi_dbs", [[{"src": sample_path / "ensembl_metadata"}, {"src": sample_path / "ncbi_taxonomy"}]], indirect=True) class TestGRPCReflection: def test_services_discovery(self, grpc_stub_cls, grpc_channel, grpc_servicer, multi_dbs): - from grpc_reflection.v1alpha.proto_reflection_descriptor_database import ProtoReflectionDescriptorDatabase reflection_db = ProtoReflectionDescriptorDatabase(grpc_channel) services = reflection_db.get_services() + assert 'ensembl_metadata.EnsemblMetadata' in services + assert 'grpc.reflection.v1alpha.ServerReflection' in services desc_pool = DescriptorPool(reflection_db) metadata_service = desc_pool.FindServiceByName('ensembl_metadata.EnsemblMetadata') method_list = [func for func in dir(EnsemblMetadata) if callable(getattr(EnsemblMetadata, func)) and not func.startswith("__")] for method_name in method_list: - print(method_name) method_desc = metadata_service.FindMethodByName(method_name) assert isinstance(method_desc, MethodDescriptor) - assert 'ensembl_metadata.EnsemblMetadata' in services - assert 'grpc.reflection.v1alpha.ServerReflection' in services + + def test_dynamic_invoke(self, multi_dbs, grpc_channel): + reflection_db = ProtoReflectionDescriptorDatabase(grpc_channel) + services = reflection_db.get_services() + desc_pool = DescriptorPool(reflection_db) + metadata_service = desc_pool.FindServiceByName('ensembl_metadata.EnsemblMetadata') + method_desc = metadata_service.FindMethodByName("GetTopLevelStatisticsByUUID") + assert isinstance(method_desc, MethodDescriptor) + from google.protobuf.message import Message + From 8705bae3af3da82f5c7f1032ebb177c1670ad93f Mon Sep 17 00:00:00 2001 From: Marc Chakiachvili Date: Thu, 22 Feb 2024 10:24:24 +0000 Subject: [PATCH 05/13] Updated tests and unit tests for reflection --- requirements.txt | 6 +- .../production/metadata/api/factory.py | 2 +- .../assembly.txt | 0 .../assembly_sequence.txt | 0 .../attribute.txt | 0 .../dataset.txt | 0 .../dataset_attribute.txt | 0 .../dataset_source.txt | 0 .../dataset_type.txt | 0 .../ensembl_release.txt | 0 .../ensembl_site.txt | 0 .../genome.txt | 0 .../genome_dataset.txt | 0 .../genome_release.txt | 0 .../organism.txt | 0 .../organism_group.txt | 0 .../organism_group_member.txt | 0 .../table.sql | 2 +- .../production/metadata/grpc/config.py | 6 +- .../metadata/grpc/ensembl_metadata_pb2.py | 2 +- conftest.py => src/tests/conftest.py | 11 ++-- src/tests/test_api.py | 10 +-- src/tests/test_grpc.py | 62 +++++++++---------- src/tests/test_protobuf_msg_factory.py | 2 +- src/tests/test_reflection.py | 31 +++++----- src/tests/test_updater.py | 36 ++++++----- src/tests/test_utils.py | 5 +- 27 files changed, 95 insertions(+), 80 deletions(-) rename src/ensembl/production/metadata/api/sample/{ensembl_metadata => ensembl_genome_metadata}/assembly.txt (100%) rename src/ensembl/production/metadata/api/sample/{ensembl_metadata => ensembl_genome_metadata}/assembly_sequence.txt (100%) rename src/ensembl/production/metadata/api/sample/{ensembl_metadata => ensembl_genome_metadata}/attribute.txt (100%) rename src/ensembl/production/metadata/api/sample/{ensembl_metadata => ensembl_genome_metadata}/dataset.txt (100%) rename src/ensembl/production/metadata/api/sample/{ensembl_metadata => ensembl_genome_metadata}/dataset_attribute.txt (100%) rename src/ensembl/production/metadata/api/sample/{ensembl_metadata => ensembl_genome_metadata}/dataset_source.txt (100%) rename src/ensembl/production/metadata/api/sample/{ensembl_metadata => ensembl_genome_metadata}/dataset_type.txt (100%) rename src/ensembl/production/metadata/api/sample/{ensembl_metadata => ensembl_genome_metadata}/ensembl_release.txt (100%) rename src/ensembl/production/metadata/api/sample/{ensembl_metadata => ensembl_genome_metadata}/ensembl_site.txt (100%) rename src/ensembl/production/metadata/api/sample/{ensembl_metadata => ensembl_genome_metadata}/genome.txt (100%) rename src/ensembl/production/metadata/api/sample/{ensembl_metadata => ensembl_genome_metadata}/genome_dataset.txt (100%) rename src/ensembl/production/metadata/api/sample/{ensembl_metadata => ensembl_genome_metadata}/genome_release.txt (100%) rename src/ensembl/production/metadata/api/sample/{ensembl_metadata => ensembl_genome_metadata}/organism.txt (100%) rename src/ensembl/production/metadata/api/sample/{ensembl_metadata => ensembl_genome_metadata}/organism_group.txt (100%) rename src/ensembl/production/metadata/api/sample/{ensembl_metadata => ensembl_genome_metadata}/organism_group_member.txt (100%) rename src/ensembl/production/metadata/api/sample/{ensembl_metadata => ensembl_genome_metadata}/table.sql (99%) rename conftest.py => src/tests/conftest.py (82%) diff --git a/requirements.txt b/requirements.txt index e7d5f9ea..6f989fe2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,14 +16,14 @@ exceptiongroup==1.2.0 # via pytest greenlet==3.0.3 # via sqlalchemy -grpcio==1.60.1 +grpcio==1.62.0 # via # -r requirements.in # grpcio-reflection # grpcio-tools -grpcio-reflection==1.60.1 +grpcio-reflection==1.62.0 # via -r requirements.in -grpcio-tools==1.60.1 +grpcio-tools==1.62.0 # via -r requirements.in idna==3.6 # via requests diff --git a/src/ensembl/production/metadata/api/factory.py b/src/ensembl/production/metadata/api/factory.py index e74fcb3a..661453d0 100644 --- a/src/ensembl/production/metadata/api/factory.py +++ b/src/ensembl/production/metadata/api/factory.py @@ -40,7 +40,7 @@ def meta_factory(db_uri, metadata_uri, taxonomy_uri,force=False): # Dealing with other versionned databases like mart, ontology,... elif re.match(r'^\w+_?\d*_\d+$', db_url.database): raise Exception("other not implemented yet") - elif re.match(r'^ensembl_accounts|ensembl_archive|ensembl_autocomplete|ensembl_metadata|ensembl_production|' + elif re.match(r'^ensembl_accounts|ensembl_archive|ensembl_autocomplete|ensembl_genome_metadata|ensembl_production|' r'ensembl_stable_ids|ncbi_taxonomy|ontology|website', db_url.database): raise Exception("other not implemented yet") diff --git a/src/ensembl/production/metadata/api/sample/ensembl_metadata/assembly.txt b/src/ensembl/production/metadata/api/sample/ensembl_genome_metadata/assembly.txt similarity index 100% rename from src/ensembl/production/metadata/api/sample/ensembl_metadata/assembly.txt rename to src/ensembl/production/metadata/api/sample/ensembl_genome_metadata/assembly.txt diff --git a/src/ensembl/production/metadata/api/sample/ensembl_metadata/assembly_sequence.txt b/src/ensembl/production/metadata/api/sample/ensembl_genome_metadata/assembly_sequence.txt similarity index 100% rename from src/ensembl/production/metadata/api/sample/ensembl_metadata/assembly_sequence.txt rename to src/ensembl/production/metadata/api/sample/ensembl_genome_metadata/assembly_sequence.txt diff --git a/src/ensembl/production/metadata/api/sample/ensembl_metadata/attribute.txt b/src/ensembl/production/metadata/api/sample/ensembl_genome_metadata/attribute.txt similarity index 100% rename from src/ensembl/production/metadata/api/sample/ensembl_metadata/attribute.txt rename to src/ensembl/production/metadata/api/sample/ensembl_genome_metadata/attribute.txt diff --git a/src/ensembl/production/metadata/api/sample/ensembl_metadata/dataset.txt b/src/ensembl/production/metadata/api/sample/ensembl_genome_metadata/dataset.txt similarity index 100% rename from src/ensembl/production/metadata/api/sample/ensembl_metadata/dataset.txt rename to src/ensembl/production/metadata/api/sample/ensembl_genome_metadata/dataset.txt diff --git a/src/ensembl/production/metadata/api/sample/ensembl_metadata/dataset_attribute.txt b/src/ensembl/production/metadata/api/sample/ensembl_genome_metadata/dataset_attribute.txt similarity index 100% rename from src/ensembl/production/metadata/api/sample/ensembl_metadata/dataset_attribute.txt rename to src/ensembl/production/metadata/api/sample/ensembl_genome_metadata/dataset_attribute.txt diff --git a/src/ensembl/production/metadata/api/sample/ensembl_metadata/dataset_source.txt b/src/ensembl/production/metadata/api/sample/ensembl_genome_metadata/dataset_source.txt similarity index 100% rename from src/ensembl/production/metadata/api/sample/ensembl_metadata/dataset_source.txt rename to src/ensembl/production/metadata/api/sample/ensembl_genome_metadata/dataset_source.txt diff --git a/src/ensembl/production/metadata/api/sample/ensembl_metadata/dataset_type.txt b/src/ensembl/production/metadata/api/sample/ensembl_genome_metadata/dataset_type.txt similarity index 100% rename from src/ensembl/production/metadata/api/sample/ensembl_metadata/dataset_type.txt rename to src/ensembl/production/metadata/api/sample/ensembl_genome_metadata/dataset_type.txt diff --git a/src/ensembl/production/metadata/api/sample/ensembl_metadata/ensembl_release.txt b/src/ensembl/production/metadata/api/sample/ensembl_genome_metadata/ensembl_release.txt similarity index 100% rename from src/ensembl/production/metadata/api/sample/ensembl_metadata/ensembl_release.txt rename to src/ensembl/production/metadata/api/sample/ensembl_genome_metadata/ensembl_release.txt diff --git a/src/ensembl/production/metadata/api/sample/ensembl_metadata/ensembl_site.txt b/src/ensembl/production/metadata/api/sample/ensembl_genome_metadata/ensembl_site.txt similarity index 100% rename from src/ensembl/production/metadata/api/sample/ensembl_metadata/ensembl_site.txt rename to src/ensembl/production/metadata/api/sample/ensembl_genome_metadata/ensembl_site.txt diff --git a/src/ensembl/production/metadata/api/sample/ensembl_metadata/genome.txt b/src/ensembl/production/metadata/api/sample/ensembl_genome_metadata/genome.txt similarity index 100% rename from src/ensembl/production/metadata/api/sample/ensembl_metadata/genome.txt rename to src/ensembl/production/metadata/api/sample/ensembl_genome_metadata/genome.txt diff --git a/src/ensembl/production/metadata/api/sample/ensembl_metadata/genome_dataset.txt b/src/ensembl/production/metadata/api/sample/ensembl_genome_metadata/genome_dataset.txt similarity index 100% rename from src/ensembl/production/metadata/api/sample/ensembl_metadata/genome_dataset.txt rename to src/ensembl/production/metadata/api/sample/ensembl_genome_metadata/genome_dataset.txt diff --git a/src/ensembl/production/metadata/api/sample/ensembl_metadata/genome_release.txt b/src/ensembl/production/metadata/api/sample/ensembl_genome_metadata/genome_release.txt similarity index 100% rename from src/ensembl/production/metadata/api/sample/ensembl_metadata/genome_release.txt rename to src/ensembl/production/metadata/api/sample/ensembl_genome_metadata/genome_release.txt diff --git a/src/ensembl/production/metadata/api/sample/ensembl_metadata/organism.txt b/src/ensembl/production/metadata/api/sample/ensembl_genome_metadata/organism.txt similarity index 100% rename from src/ensembl/production/metadata/api/sample/ensembl_metadata/organism.txt rename to src/ensembl/production/metadata/api/sample/ensembl_genome_metadata/organism.txt diff --git a/src/ensembl/production/metadata/api/sample/ensembl_metadata/organism_group.txt b/src/ensembl/production/metadata/api/sample/ensembl_genome_metadata/organism_group.txt similarity index 100% rename from src/ensembl/production/metadata/api/sample/ensembl_metadata/organism_group.txt rename to src/ensembl/production/metadata/api/sample/ensembl_genome_metadata/organism_group.txt diff --git a/src/ensembl/production/metadata/api/sample/ensembl_metadata/organism_group_member.txt b/src/ensembl/production/metadata/api/sample/ensembl_genome_metadata/organism_group_member.txt similarity index 100% rename from src/ensembl/production/metadata/api/sample/ensembl_metadata/organism_group_member.txt rename to src/ensembl/production/metadata/api/sample/ensembl_genome_metadata/organism_group_member.txt diff --git a/src/ensembl/production/metadata/api/sample/ensembl_metadata/table.sql b/src/ensembl/production/metadata/api/sample/ensembl_genome_metadata/table.sql similarity index 99% rename from src/ensembl/production/metadata/api/sample/ensembl_metadata/table.sql rename to src/ensembl/production/metadata/api/sample/ensembl_genome_metadata/table.sql index f8079329..425efbf4 100644 --- a/src/ensembl/production/metadata/api/sample/ensembl_metadata/table.sql +++ b/src/ensembl/production/metadata/api/sample/ensembl_genome_metadata/table.sql @@ -167,7 +167,7 @@ CREATE TABLE genome assembly_id int not null, organism_id int not null, is_best tinyint(1) default 0 not null, - production_name varchar(255) default 'default' not null, + production_name varchar(255) not null, constraint genome_genome_uuid_6b62d0ad_uniq unique (genome_uuid), constraint genome_assembly_id_0a748388_fk_assembly_assembly_id diff --git a/src/ensembl/production/metadata/grpc/config.py b/src/ensembl/production/metadata/grpc/config.py index a2831b5b..9615bece 100644 --- a/src/ensembl/production/metadata/grpc/config.py +++ b/src/ensembl/production/metadata/grpc/config.py @@ -54,4 +54,8 @@ class MetadataConfig: max_overflow = os.environ.get("MAX_OVERFLOW", 0) pool_recycle = os.environ.get("POOL_RECYCLE", 50) allow_unreleased = parse_boolean_var(os.environ.get("ALLOW_UNRELEASED", False)) - debug_mode = parse_boolean_var(os.environ.get("DEBUG", False)) \ No newline at end of file + debug_mode = parse_boolean_var(os.environ.get("DEBUG", False)) + + #@property + #def metadata_uri(self): + # return os.environ.get("METADATA_URI", f"mysql://ensembl@localhost:3306/ensembl_genome_metadata") diff --git a/src/ensembl/production/metadata/grpc/ensembl_metadata_pb2.py b/src/ensembl/production/metadata/grpc/ensembl_metadata_pb2.py index 4d4b1196..fb2eab1d 100755 --- a/src/ensembl/production/metadata/grpc/ensembl_metadata_pb2.py +++ b/src/ensembl/production/metadata/grpc/ensembl_metadata_pb2.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! -# source: ensembl/production/metadata/grpc/ensembl_metadata.proto +# source: ensembl/production/metadata/grpc/ensembl_genome_metadata.proto # Protobuf Python Version: 4.25.0 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor diff --git a/conftest.py b/src/tests/conftest.py similarity index 82% rename from conftest.py rename to src/tests/conftest.py index 917f0e9a..41d36a9c 100644 --- a/conftest.py +++ b/src/tests/conftest.py @@ -25,17 +25,18 @@ def pytest_configure(config: Config) -> None: pytest.dbs_dir = Path(__file__).parent / 'src' / 'ensembl' / 'production' / 'metadata' / 'api' / 'sample' -@pytest.fixture(scope="class") + +@pytest.fixture(scope="class", autouse=True) def engine(multi_dbs): - os.environ["METADATA_URI"] = multi_dbs["ensembl_metadata"].dbc.url + os.environ["METADATA_URI"] = multi_dbs["ensembl_genome_metadata"].dbc.url os.environ["TAXONOMY_URI"] = multi_dbs["ncbi_taxonomy"].dbc.url - yield db.create_engine(multi_dbs["ensembl_metadata"].dbc.url) + yield db.create_engine(multi_dbs["ensembl_genome_metadata"].dbc.url) @pytest.fixture(scope="class") def genome_db_conn(multi_dbs): genome_conn = GenomeAdaptor( - metadata_uri=multi_dbs["ensembl_metadata"].dbc.url, + metadata_uri=multi_dbs["ensembl_genome_metadata"].dbc.url, taxonomy_uri=multi_dbs["ncbi_taxonomy"].dbc.url ) yield genome_conn @@ -44,6 +45,6 @@ def genome_db_conn(multi_dbs): @pytest.fixture(scope="class") def release_db_conn(multi_dbs): release_conn = ReleaseAdaptor( - metadata_uri=multi_dbs["ensembl_metadata"].dbc.url + metadata_uri=multi_dbs["ensembl_genome_metadata"].dbc.url ) yield release_conn diff --git a/src/tests/test_api.py b/src/tests/test_api.py index 340372a0..ecdff64d 100644 --- a/src/tests/test_api.py +++ b/src/tests/test_api.py @@ -25,13 +25,13 @@ sample_path = Path(__file__).parent.parent / "ensembl" / "production" / "metadata" / "api" / "sample" -# , {'src': 'ncbi_taxonomy'} -@pytest.mark.parametrize("multi_dbs", [[{"src": sample_path / "ensembl_metadata"}]], indirect=True) +@pytest.mark.parametrize("multi_dbs", [[{"src": sample_path / "ensembl_genome_metadata"}, + {"src": sample_path / "ncbi_taxonomy"}]], indirect=True) class TestApi: dbc = None # type: UnitTestDB def test_get_public_path(self, multi_dbs): - metadata_db = DBConnection(multi_dbs['ensembl_metadata'].dbc.url) + metadata_db = DBConnection(multi_dbs['ensembl_genome_metadata'].dbc.url) with metadata_db.session_scope() as session: genome = session.query(Genome).filter(Genome.genome_uuid == 'a733574a-93e7-11ec-a39d-005056b38ce3').first() paths = genome.get_public_path(dataset_type='all') @@ -50,7 +50,7 @@ def test_get_public_path(self, multi_dbs): # assert path[0]['path'] == 'Saccharomyces_cerevisiae_S288c/GCA_000146045.2/ensembl/regulation' def test_default_public_path(self, multi_dbs): - metadata_db = DBConnection(multi_dbs['ensembl_metadata'].dbc.url) + metadata_db = DBConnection(multi_dbs['ensembl_genome_metadata'].dbc.url) with metadata_db.session_scope() as session: # Homo sapien GRCH38 genome = session.query(Genome).filter(Genome.genome_uuid == 'a7335667-93e7-11ec-a39d-005056b38ce3').first() @@ -72,7 +72,7 @@ def test_organism_ensembl_name_compat(self, multi_dbs): """ Validate that we can still yse ensembl_name in queries from SQLAlchemy This test will fail when we remove the ORM column for good """ - metadata_db = DBConnection(multi_dbs['ensembl_metadata'].dbc.url) + metadata_db = DBConnection(multi_dbs['ensembl_genome_metadata'].dbc.url) with metadata_db.session_scope() as session: ensembl_name = session.query(Organism).filter(Organism.ensembl_name == 'SAMN12121739').first() biosample_id = session.query(Organism).filter(Organism.biosample_id == 'SAMN12121739').first() diff --git a/src/tests/test_grpc.py b/src/tests/test_grpc.py index 59941a02..3660b6cd 100644 --- a/src/tests/test_grpc.py +++ b/src/tests/test_grpc.py @@ -24,14 +24,14 @@ sample_path = Path(__file__).parent.parent / "ensembl" / "production" / "metadata" / "api" / "sample" logger = logging.getLogger(__name__) -@pytest.mark.parametrize("multi_dbs", [[{"src": sample_path / "ensembl_metadata"}, +@pytest.mark.parametrize("multi_dbs", [[{"src": sample_path / "ensembl_genome_metadata"}, {"src": sample_path / "ncbi_taxonomy"}]], indirect=True) class TestMetadataDB: dbc = None # type: UnitTestDB def test_load_database(self, multi_dbs): - db_test = ReleaseAdaptor(multi_dbs['ensembl_metadata'].dbc.url) + db_test = ReleaseAdaptor(multi_dbs['ensembl_genome_metadata'].dbc.url) assert db_test, "DB should not be empty" @pytest.mark.parametrize( @@ -48,7 +48,7 @@ def test_load_database(self, multi_dbs): ] ) def test_fetch_all_genomes(self, multi_dbs, allow_unreleased, unreleased_only, current_only, output_count): - conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_metadata'].dbc.url, + conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_genome_metadata'].dbc.url, taxonomy_uri=multi_dbs['ncbi_taxonomy'].dbc.url) test = conn.fetch_genomes( allow_unreleased=allow_unreleased, @@ -58,7 +58,7 @@ def test_fetch_all_genomes(self, multi_dbs, allow_unreleased, unreleased_only, c assert len(test) == output_count def test_fetch_with_all_args_no_conflict(self, multi_dbs): - conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_metadata'].dbc.url, + conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_genome_metadata'].dbc.url, taxonomy_uri=multi_dbs['ncbi_taxonomy'].dbc.url) test = conn.fetch_genomes( genome_uuid="a733550b-93e7-11ec-a39d-005056b38ce3", @@ -76,7 +76,7 @@ def test_fetch_with_all_args_no_conflict(self, multi_dbs): assert len(test) == 0 def test_fetch_with_all_args_conflict(self, multi_dbs): - conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_metadata'].dbc.url, + conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_genome_metadata'].dbc.url, taxonomy_uri=multi_dbs['ncbi_taxonomy'].dbc.url) test = conn.fetch_genomes( genome_uuid="a733550b-93e7-11ec-a39d-005056b38ce3", @@ -94,7 +94,7 @@ def test_fetch_with_all_args_conflict(self, multi_dbs): assert len(test) == 0 def test_fetch_releases(self, multi_dbs): - conn = ReleaseAdaptor(multi_dbs['ensembl_metadata'].dbc.url) + conn = ReleaseAdaptor(multi_dbs['ensembl_genome_metadata'].dbc.url) test = conn.fetch_releases(release_id=2) # test the one to many connection assert test[0].EnsemblSite.name == 'Ensembl' @@ -104,36 +104,36 @@ def test_fetch_releases(self, multi_dbs): # currently only have one release, so the testing is not comprehensive def test_fetch_releases_for_genome(self, multi_dbs): - conn = ReleaseAdaptor(multi_dbs['ensembl_metadata'].dbc.url) + conn = ReleaseAdaptor(multi_dbs['ensembl_genome_metadata'].dbc.url) test = conn.fetch_releases_for_genome('ae794660-8751-41cc-8883-b2fcdc7a74e8') assert test[0].EnsemblSite.name == 'Ensembl' def test_fetch_releases_for_dataset(self, multi_dbs): - conn = ReleaseAdaptor(multi_dbs['ensembl_metadata'].dbc.url) + conn = ReleaseAdaptor(multi_dbs['ensembl_genome_metadata'].dbc.url) test = conn.fetch_releases_for_dataset('3d653b2d-aa8d-4f7e-8f92-55f57c7cac3a') assert test[0].EnsemblSite.name == 'Ensembl' assert test[0].EnsemblRelease.label == 'beta-1' def test_fetch_taxonomy_names(self, multi_dbs): - conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_metadata'].dbc.url, + conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_genome_metadata'].dbc.url, taxonomy_uri=multi_dbs['ncbi_taxonomy'].dbc.url) test = conn.fetch_taxonomy_names(taxonomy_ids=[6239, 511145]) assert test[511145]['scientific_name'] == 'Escherichia coli str. K-12 substr. MG1655' def test_fetch_taxonomy_ids(self, multi_dbs): - conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_metadata'].dbc.url, + conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_genome_metadata'].dbc.url, taxonomy_uri=multi_dbs['ncbi_taxonomy'].dbc.url) test = conn.fetch_taxonomy_ids(taxonomy_names='Caenorhabditis elegans') assert test[0] == 6239 def test_fetch_genomes(self, multi_dbs): - conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_metadata'].dbc.url, + conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_genome_metadata'].dbc.url, taxonomy_uri=multi_dbs['ncbi_taxonomy'].dbc.url) test = conn.fetch_genomes(genome_uuid='a7335667-93e7-11ec-a39d-005056b38ce3') assert test[0].Organism.scientific_name == 'Homo sapiens' # def test_fetch_genomes_by_group_division(self, multi_dbs): - # conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_metadata'].dbc.url, + # conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_genome_metadata'].dbc.url, # taxonomy_uri=multi_dbs['ncbi_taxonomy'].dbc.url) # division_filter = 'EnsemblVertebrates' # test = conn.fetch_genomes(group=division_filter) @@ -142,25 +142,25 @@ def test_fetch_genomes(self, multi_dbs): # assert division_filter in division_results def test_fetch_genomes_by_genome_uuid(self, multi_dbs): - conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_metadata'].dbc.url, + conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_genome_metadata'].dbc.url, taxonomy_uri=multi_dbs['ncbi_taxonomy'].dbc.url) test = conn.fetch_genomes_by_genome_uuid('b00f5b0a-b434-4949-9c05-140826c96cd4') assert test[0].Organism.scientific_name == 'Oryzias latipes' def test_fetch_genome_by_ensembl_and_assembly_name(self, multi_dbs): - conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_metadata'].dbc.url, + conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_genome_metadata'].dbc.url, taxonomy_uri=multi_dbs['ncbi_taxonomy'].dbc.url) test = conn.fetch_genomes(assembly_name='NOD_ShiLtJ_v1', ensembl_name='SAMN04489827') assert test[0].Organism.scientific_name == 'Mus musculus' def test_fetch_genomes_by_assembly_accession(self, multi_dbs): - conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_metadata'].dbc.url, + conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_genome_metadata'].dbc.url, taxonomy_uri=multi_dbs['ncbi_taxonomy'].dbc.url) test = conn.fetch_genomes_by_assembly_accession('GCA_000005845.2') assert test[0].Organism.scientific_name == 'Escherichia coli str. K-12 substr. MG1655 str. K12' def test_fetch_genomes_by_assembly_sequence_accession(self, multi_dbs): - conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_metadata'].dbc.url, + conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_genome_metadata'].dbc.url, taxonomy_uri=multi_dbs['ncbi_taxonomy'].dbc.url) test = conn.fetch_sequences( genome_uuid='a7335667-93e7-11ec-a39d-005056b38ce3', @@ -170,7 +170,7 @@ def test_fetch_genomes_by_assembly_sequence_accession(self, multi_dbs): assert test[0].AssemblySequence.name == 'HG2280_PATCH' def test_fetch_genomes_by_assembly_sequence_accession_empty(self, multi_dbs): - conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_metadata'].dbc.url, + conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_genome_metadata'].dbc.url, taxonomy_uri=multi_dbs['ncbi_taxonomy'].dbc.url) test = conn.fetch_sequences( genome_uuid='s0m3-r4nd0m-g3n3-uu1d-v4lu3', @@ -180,19 +180,19 @@ def test_fetch_genomes_by_assembly_sequence_accession_empty(self, multi_dbs): assert len(test) == 0 def test_fetch_genomes_by_ensembl_name(self, multi_dbs): - conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_metadata'].dbc.url, + conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_genome_metadata'].dbc.url, taxonomy_uri=multi_dbs['ncbi_taxonomy'].dbc.url) test = conn.fetch_genomes_by_ensembl_name('SAMN04489826') assert test[0].Organism.scientific_name == 'Mus musculus' def test_fetch_genomes_by_taxonomy_id(self, multi_dbs): - conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_metadata'].dbc.url, + conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_genome_metadata'].dbc.url, taxonomy_uri=multi_dbs['ncbi_taxonomy'].dbc.url) test = conn.fetch_genomes_by_taxonomy_id(10090) assert test[0].Organism.scientific_name == 'Mus musculus' def test_fetch_genomes_by_scientific_name(self, multi_dbs): - conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_metadata'].dbc.url, + conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_genome_metadata'].dbc.url, taxonomy_uri=multi_dbs['ncbi_taxonomy'].dbc.url) test = conn.fetch_genomes_by_scientific_name( scientific_name='Oryzias latipes', @@ -201,7 +201,7 @@ def test_fetch_genomes_by_scientific_name(self, multi_dbs): assert test[0].Organism.common_name == 'Japanese medaka' def test_fetch_sequences(self, multi_dbs): - conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_metadata'].dbc.url, + conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_genome_metadata'].dbc.url, taxonomy_uri=multi_dbs['ncbi_taxonomy'].dbc.url) test = conn.fetch_sequences(assembly_uuid='9d6b239c-46dd-4c79-bc29-1089f348d31d') # this test is going to drive me nuts @@ -221,7 +221,7 @@ def test_fetch_sequences(self, multi_dbs): ) def test_fetch_sequences_chromosomal(self, multi_dbs, genome_uuid, assembly_accession, chromosomal_only, expected_output): - conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_metadata'].dbc.url, + conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_genome_metadata'].dbc.url, taxonomy_uri=multi_dbs['ncbi_taxonomy'].dbc.url) test = conn.fetch_sequences( genome_uuid=genome_uuid, @@ -244,7 +244,7 @@ def test_fetch_sequences_chromosomal(self, multi_dbs, genome_uuid, assembly_acce ) def test_fetch_sequences_by_assembly_seq_name(self, multi_dbs, genome_uuid, assembly_sequence_name, chromosomal_only, expected_output): - conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_metadata'].dbc.url, + conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_genome_metadata'].dbc.url, taxonomy_uri=multi_dbs['ncbi_taxonomy'].dbc.url) test = conn.fetch_sequences( genome_uuid=genome_uuid, @@ -272,7 +272,7 @@ def test_fetch_genome_dataset_all( dataset_uuid, allow_unreleased, unreleased_only, expected_dataset_uuid, expected_count): - conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_metadata'].dbc.url, + conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_genome_metadata'].dbc.url, taxonomy_uri=multi_dbs['ncbi_taxonomy'].dbc.url) test = conn.fetch_genome_datasets( genome_uuid=genome_uuid, @@ -297,7 +297,7 @@ def test_fetch_genome_dataset_all( ] ) def test_fetch_genome_dataset_by_organism_uuid(self, multi_dbs, organism_uuid, expected_count): - conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_metadata'].dbc.url, + conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_genome_metadata'].dbc.url, taxonomy_uri=multi_dbs['ncbi_taxonomy'].dbc.url) test = conn.fetch_genome_datasets( organism_uuid=organism_uuid, @@ -314,7 +314,7 @@ def test_fetch_genome_dataset_by_organism_uuid(self, multi_dbs, organism_uuid, e ] ) def test_fetch_genome_uuid(self, multi_dbs, production_name, assembly_name, use_default_assembly, expected_output): - conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_metadata'].dbc.url, + conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_genome_metadata'].dbc.url, taxonomy_uri=multi_dbs['ncbi_taxonomy'].dbc.url) test = conn.fetch_genomes( production_name=production_name, @@ -335,7 +335,7 @@ def test_fetch_genome_uuid(self, multi_dbs, production_name, assembly_name, use_ ) def test_fetch_genome_uuid_is_current(self, multi_dbs, production_name, assembly_name, use_default_assembly, expected_output): - conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_metadata'].dbc.url, + conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_genome_metadata'].dbc.url, taxonomy_uri=multi_dbs['ncbi_taxonomy'].dbc.url) test = conn.fetch_genomes( production_name=production_name, @@ -354,7 +354,7 @@ def test_fetch_genome_uuid_is_current(self, multi_dbs, production_name, assembly ] ) def test_fetch_genome_uuid_empty(self, multi_dbs, production_name, assembly_name, use_default_assembly): - conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_metadata'].dbc.url, + conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_genome_metadata'].dbc.url, taxonomy_uri=multi_dbs['ncbi_taxonomy'].dbc.url) test = conn.fetch_genomes( production_name=production_name, @@ -372,7 +372,7 @@ def test_fetch_genome_uuid_empty(self, multi_dbs, production_name, assembly_name ) def test_fetch_organisms_group_counts(self, multi_dbs, species_taxonomy_id, expected_organism, expected_assemblies_count): - conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_metadata'].dbc.url, + conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_genome_metadata'].dbc.url, taxonomy_uri=multi_dbs['ncbi_taxonomy'].dbc.url) test = conn.fetch_organisms_group_counts() # When fetching everything: @@ -396,7 +396,7 @@ def test_fetch_organisms_group_counts(self, multi_dbs, species_taxonomy_id, expe ) def test_fetch_related_assemblies_count(self, multi_dbs, organism_uuid, expected_assemblies_count): conn = GenomeAdaptor( - metadata_uri=multi_dbs['ensembl_metadata'].dbc.url, + metadata_uri=multi_dbs['ensembl_genome_metadata'].dbc.url, taxonomy_uri=multi_dbs['ncbi_taxonomy'].dbc.url ) @@ -416,7 +416,7 @@ def test_fetch_related_assemblies_count(self, multi_dbs, organism_uuid, expected def test_fetch_genomes_info(self, multi_dbs, allow_unreleased, output_count, expected_genome_uuid): # FIXME This test takes ages, and generate a lot of unitary queries. SqlAlchemy results needs review before # moving to 2000 - conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_metadata'].dbc.url, + conn = GenomeAdaptor(metadata_uri=multi_dbs['ensembl_genome_metadata'].dbc.url, taxonomy_uri=multi_dbs['ncbi_taxonomy'].dbc.url) test = conn.fetch_genomes_info( allow_unreleased_genomes=allow_unreleased, diff --git a/src/tests/test_protobuf_msg_factory.py b/src/tests/test_protobuf_msg_factory.py index 7442e3c2..1e734a11 100644 --- a/src/tests/test_protobuf_msg_factory.py +++ b/src/tests/test_protobuf_msg_factory.py @@ -26,7 +26,7 @@ sample_path = Path(distribution.location) / "ensembl" / "production" / "metadata" / "api" / "sample" -@pytest.mark.parametrize("multi_dbs", [[{"src": sample_path / "ensembl_metadata"}, +@pytest.mark.parametrize("multi_dbs", [[{"src": sample_path / "ensembl_genome_metadata"}, {"src": sample_path / "ncbi_taxonomy"}]], indirect=True) class TestClass: diff --git a/src/tests/test_reflection.py b/src/tests/test_reflection.py index 5923891a..df813bb6 100644 --- a/src/tests/test_reflection.py +++ b/src/tests/test_reflection.py @@ -41,20 +41,20 @@ def grpc_servicer(): @pytest.fixture(scope='module') -def grpc_stub_cls(grpc_channel): +def grpc_stub(grpc_channel): from ensembl.production.metadata.grpc.ensembl_metadata_pb2_grpc import EnsemblMetadataStub - return EnsemblMetadataStub + return EnsemblMetadataStub(grpc_channel) @pytest.fixture(scope='module') -def grpc_server(_grpc_server, grpc_addr): +def grpc_server(_grpc_server, grpc_addr, grpc_add_to_server, grpc_servicer): + grpc_add_to_server(grpc_servicer, _grpc_server) SERVICE_NAMES = ( ensembl_metadata_pb2.DESCRIPTOR.services_by_name['EnsemblMetadata'].full_name, reflection.SERVICE_NAME ) reflection.enable_server_reflection(SERVICE_NAMES, _grpc_server) - # _grpc_server.ad(grpc_addr) _grpc_server.add_insecure_port(grpc_addr) _grpc_server.start() yield _grpc_server @@ -64,12 +64,13 @@ def grpc_server(_grpc_server, grpc_addr): sample_path = Path(__file__).parent.parent / "ensembl" / "production" / "metadata" / "api" / "sample" -@pytest.mark.parametrize("multi_dbs", [[{"src": sample_path / "ensembl_metadata"}, +@pytest.mark.parametrize("multi_dbs", [[{"src": sample_path / "ensembl_genome_metadata"}, {"src": sample_path / "ncbi_taxonomy"}]], indirect=True) class TestGRPCReflection: + dbc = None - def test_services_discovery(self, grpc_stub_cls, grpc_channel, grpc_servicer, multi_dbs): + def test_services_discovery(self, multi_dbs, grpc_channel, grpc_server): reflection_db = ProtoReflectionDescriptorDatabase(grpc_channel) services = reflection_db.get_services() assert 'ensembl_metadata.EnsemblMetadata' in services @@ -83,11 +84,13 @@ def test_services_discovery(self, grpc_stub_cls, grpc_channel, grpc_servicer, mu assert isinstance(method_desc, MethodDescriptor) def test_dynamic_invoke(self, multi_dbs, grpc_channel): - reflection_db = ProtoReflectionDescriptorDatabase(grpc_channel) - services = reflection_db.get_services() - desc_pool = DescriptorPool(reflection_db) - metadata_service = desc_pool.FindServiceByName('ensembl_metadata.EnsemblMetadata') - method_desc = metadata_service.FindMethodByName("GetTopLevelStatisticsByUUID") - assert isinstance(method_desc, MethodDescriptor) - from google.protobuf.message import Message - + from yagrc import reflector as yagrc_reflector + reflector = yagrc_reflector.GrpcReflectionClient() + reflector.load_protocols(grpc_channel, symbols=["ensembl_metadata.EnsemblMetadata"]) + stub_class = reflector.service_stub_class("ensembl_metadata.EnsemblMetadata") + request_class = reflector.message_class("ensembl_metadata.GenomeUUIDRequest") + print('GRPC CHANNEL', grpc_channel) + stub = stub_class(grpc_channel) + response = stub.GetGenomeByUUID(request_class(genome_uuid='a733550b-93e7-11ec-a39d-005056b38ce3', + release_version=None)) + print(response) diff --git a/src/tests/test_updater.py b/src/tests/test_updater.py index da6174ec..371b449b 100644 --- a/src/tests/test_updater.py +++ b/src/tests/test_updater.py @@ -30,8 +30,11 @@ db_directory = Path(__file__).parent / 'databases' db_directory = db_directory.resolve() +sample_path = Path(__file__).parent.parent / "ensembl" / "production" / "metadata" / "api" / "sample" -@pytest.mark.parametrize("multi_dbs", [[{'src': 'ensembl_metadata'}, {'src': 'ncbi_taxonomy'}, + +@pytest.mark.parametrize("multi_dbs", [[{'src': sample_path / 'ensembl_genome_metadata'}, + {'src': sample_path / 'ncbi_taxonomy'}, {'src': db_directory / 'core_1'}, {'src': db_directory / 'core_2'}, {'src': db_directory / 'core_3'}, {'src': db_directory / 'core_4'}, {'src': db_directory / 'core_5'}, {'src': db_directory / 'core_6'}, @@ -44,7 +47,8 @@ class TestUpdater: dbc = None # type: UnitTestDB def test_new_organism(self, multi_dbs): - test = meta_factory(multi_dbs['core_1'].dbc.url, multi_dbs['ensembl_metadata'].dbc.url, + test = meta_factory(multi_dbs['core_1'].dbc.url, + multi_dbs['ensembl_genome_metadata'].dbc.url, multi_dbs['ncbi_taxonomy'].dbc.url) test.process_core() @@ -59,7 +63,7 @@ def test_new_organism(self, multi_dbs): assert inserted_genome_uuid is not None # Look for organism, assembly and geneset - metadata_db = DBConnection(multi_dbs['ensembl_metadata'].dbc.url) + metadata_db = DBConnection(multi_dbs['ensembl_genome_metadata'].dbc.url) # Test the species with metadata_db.session_scope() as session: organism = session.query(Organism).where(Organism.biosample_id == 'Jabberwocky').first() @@ -93,7 +97,7 @@ def test_new_organism(self, multi_dbs): assert sequence3 is not None def test_fail_existing_genome_uuid_no_data(self, multi_dbs): - test = meta_factory(multi_dbs['core_2'].dbc.url, multi_dbs['ensembl_metadata'].dbc.url, + test = meta_factory(multi_dbs['core_2'].dbc.url, multi_dbs['ensembl_genome_metadata'].dbc.url, multi_dbs['ncbi_taxonomy'].dbc.url) with pytest.raises(Exception) as exif: test.process_core() @@ -102,10 +106,10 @@ def test_fail_existing_genome_uuid_no_data(self, multi_dbs): "Please remove it from the meta key and resubmit" in str(exif.value)) def test_update_assembly(self, multi_dbs): - test = meta_factory(multi_dbs['core_3'].dbc.url, multi_dbs['ensembl_metadata'].dbc.url, + test = meta_factory(multi_dbs['core_3'].dbc.url, multi_dbs['ensembl_genome_metadata'].dbc.url, multi_dbs['ncbi_taxonomy'].dbc.url) test.process_core() - metadata_db = DBConnection(multi_dbs['ensembl_metadata'].dbc.url) + metadata_db = DBConnection(multi_dbs['ensembl_genome_metadata'].dbc.url) with metadata_db.session_scope() as session: organism = session.query(Organism).where(Organism.biosample_id == 'Jabberwocky').first() assert organism.scientific_name == 'carol_jabberwocky' @@ -113,10 +117,10 @@ def test_update_assembly(self, multi_dbs): # def test_update_geneset(self, multi_dbs): - test = meta_factory(multi_dbs['core_4'].dbc.url, multi_dbs['ensembl_metadata'].dbc.url, + test = meta_factory(multi_dbs['core_4'].dbc.url, multi_dbs['ensembl_genome_metadata'].dbc.url, multi_dbs['ncbi_taxonomy'].dbc.url) test.process_core() - metadata_db = DBConnection(multi_dbs['ensembl_metadata'].dbc.url) + metadata_db = DBConnection(multi_dbs['ensembl_genome_metadata'].dbc.url) with metadata_db.session_scope() as session: dataset = session.query(Dataset).where( (Dataset.version == "ENS02") & (Dataset.name == 'genebuild') @@ -127,16 +131,16 @@ def test_update_geneset(self, multi_dbs): assert dataset.dataset_type.name == "genebuild" def test_taxonomy_common_name(self, multi_dbs): - test = meta_factory(multi_dbs['core_5'].dbc.url, multi_dbs['ensembl_metadata'].dbc.url, + test = meta_factory(multi_dbs['core_5'].dbc.url, multi_dbs['ensembl_genome_metadata'].dbc.url, multi_dbs['ncbi_taxonomy'].dbc.url) test.process_core() - metadata_db = DBConnection(multi_dbs['ensembl_metadata'].dbc.url) + metadata_db = DBConnection(multi_dbs['ensembl_genome_metadata'].dbc.url) with metadata_db.session_scope() as session: organism = session.query(Organism).where(Organism.biosample_id == 'test_case_5').first() assert organism.common_name == 'sheep' def test_fail_existing_genome_uuid_data_not_match(self, multi_dbs): - test = meta_factory(multi_dbs['core_6'].dbc.url, multi_dbs['ensembl_metadata'].dbc.url, + test = meta_factory(multi_dbs['core_6'].dbc.url, multi_dbs['ensembl_genome_metadata'].dbc.url, multi_dbs['ncbi_taxonomy'].dbc.url) with pytest.raises(Exception) as exif: test.process_core() @@ -144,10 +148,10 @@ def test_fail_existing_genome_uuid_data_not_match(self, multi_dbs): "The force flag was not specified so the core was not updated." in str(exif.value)) def test_update_unreleased_no_force(self, multi_dbs): - test = meta_factory(multi_dbs['core_7'].dbc.url, multi_dbs['ensembl_metadata'].dbc.url, + test = meta_factory(multi_dbs['core_7'].dbc.url, multi_dbs['ensembl_genome_metadata'].dbc.url, multi_dbs['ncbi_taxonomy'].dbc.url) test.process_core() - metadata_db = DBConnection(multi_dbs['ensembl_metadata'].dbc.url) + metadata_db = DBConnection(multi_dbs['ensembl_genome_metadata'].dbc.url) with metadata_db.session_scope() as session: # Test that assembly seqs have been updated new_seq = session.query(AssemblySequence).where( @@ -205,7 +209,7 @@ def test_update_unreleased_no_force(self, multi_dbs): assert count > 0 def test_update_released_no_force(self, multi_dbs): - test = meta_factory(multi_dbs['core_8'].dbc.url, multi_dbs['ensembl_metadata'].dbc.url, + test = meta_factory(multi_dbs['core_8'].dbc.url, multi_dbs['ensembl_genome_metadata'].dbc.url, multi_dbs['ncbi_taxonomy'].dbc.url) with pytest.raises(Exception) as exif: test.process_core() @@ -215,11 +219,11 @@ def test_update_released_no_force(self, multi_dbs): exif.value)) def test_update_released_force(self, multi_dbs): - test = meta_factory(multi_dbs['core_9'].dbc.url, multi_dbs['ensembl_metadata'].dbc.url, + test = meta_factory(multi_dbs['core_9'].dbc.url, multi_dbs['ensembl_genome_metadata'].dbc.url, multi_dbs['ncbi_taxonomy'].dbc.url, force=True) # FIXME Should be run # test.process_core() - metadata_db = DBConnection(multi_dbs['ensembl_metadata'].dbc.url) + metadata_db = DBConnection(multi_dbs['ensembl_genome_metadata'].dbc.url) with metadata_db.session_scope() as session: # Test that assembly seqs have not been updated # new_seq = session.query(AssemblySequence).where( diff --git a/src/tests/test_utils.py b/src/tests/test_utils.py index 630c49cd..d272fba4 100644 --- a/src/tests/test_utils.py +++ b/src/tests/test_utils.py @@ -24,9 +24,12 @@ db_directory = Path(__file__).parent / 'databases' db_directory = db_directory.resolve() +sample_path = Path(__file__).parent.parent / "ensembl" / "production" / "metadata" / "api" / "sample" -@pytest.mark.parametrize("multi_dbs", [[{'src': 'ensembl_metadata'}, {'src': 'ncbi_taxonomy'}]], indirect=True) +@pytest.mark.parametrize("multi_dbs", + [[{'src': sample_path / 'ensembl_genome_metadata'}, {'src': sample_path / 'ncbi_taxonomy'}]], + indirect=True) class TestUtils: dbc = None # type: UnitTestDB From 917a318c030c80f8f45140a645be90d8909ed0b3 Mon Sep 17 00:00:00 2001 From: Marc Chakiachvili Date: Thu, 22 Feb 2024 11:37:48 +0000 Subject: [PATCH 06/13] Tests for reflection methods. --- .../production/metadata/grpc/adaptors/base.py | 4 ++-- .../metadata/grpc/adaptors/genome.py | 3 ++- .../production/metadata/grpc/config.py | 23 ++++++++++--------- .../production/metadata/grpc/service.py | 4 +++- src/ensembl/production/metadata/grpc/utils.py | 3 ++- src/tests/conftest.py | 2 +- src/tests/test_reflection.py | 21 ++++++----------- 7 files changed, 29 insertions(+), 31 deletions(-) diff --git a/src/ensembl/production/metadata/grpc/adaptors/base.py b/src/ensembl/production/metadata/grpc/adaptors/base.py index 8c9b0612..34685bc6 100644 --- a/src/ensembl/production/metadata/grpc/adaptors/base.py +++ b/src/ensembl/production/metadata/grpc/adaptors/base.py @@ -10,12 +10,12 @@ # See the License for the specific language governing permissions and # limitations under the License. from ensembl.database import DBConnection -from ensembl.production.metadata.grpc.config import MetadataConfig as config - +from ensembl.production.metadata.grpc.config import MetadataConfig ##Todo: Add in OrganismAdapator. Subfunction fetches all organism in popular group. and # of genomes from distinct assemblies. # Add in best genome (see doc) # More functions for related genomes +config = MetadataConfig() class BaseAdaptor: diff --git a/src/ensembl/production/metadata/grpc/adaptors/genome.py b/src/ensembl/production/metadata/grpc/adaptors/genome.py index ccccd50d..64e11e5f 100644 --- a/src/ensembl/production/metadata/grpc/adaptors/genome.py +++ b/src/ensembl/production/metadata/grpc/adaptors/genome.py @@ -28,7 +28,8 @@ class GenomeAdaptor(BaseAdaptor): def __init__(self, metadata_uri: str, taxonomy_uri: str): super().__init__(metadata_uri) - self.taxonomy_db = DBConnection(taxonomy_uri, pool_size=MetadataConfig.pool_size, pool_recycle=MetadataConfig.pool_recycle) + self.taxonomy_db = DBConnection(taxonomy_uri, pool_size=MetadataConfig().pool_size, + pool_recycle=MetadataConfig().pool_recycle) def fetch_taxonomy_names(self, taxonomy_ids, synonyms=None): diff --git a/src/ensembl/production/metadata/grpc/config.py b/src/ensembl/production/metadata/grpc/config.py index 9615bece..4102d1a0 100644 --- a/src/ensembl/production/metadata/grpc/config.py +++ b/src/ensembl/production/metadata/grpc/config.py @@ -48,14 +48,15 @@ def parse_boolean_var(var): class MetadataConfig: - metadata_uri = os.environ.get("METADATA_URI", f"mysql://ensembl@localhost:3306/ensembl_genome_metadata") - taxon_uri = os.environ.get("TAXONOMY_URI", f"mysql://ensembl@localhost:3306/ncbi_taxonomy") - pool_size = os.environ.get("POOL_SIZE", 20) - max_overflow = os.environ.get("MAX_OVERFLOW", 0) - pool_recycle = os.environ.get("POOL_RECYCLE", 50) - allow_unreleased = parse_boolean_var(os.environ.get("ALLOW_UNRELEASED", False)) - debug_mode = parse_boolean_var(os.environ.get("DEBUG", False)) - - #@property - #def metadata_uri(self): - # return os.environ.get("METADATA_URI", f"mysql://ensembl@localhost:3306/ensembl_genome_metadata") + + def __init__(self): + super().__init__() + self.metadata_uri = os.environ.get("METADATA_URI", f"mysql://ensembl@localhost:3306/marco_ensembl_genome_metadata") + self.taxon_uri = os.environ.get("TAXONOMY_URI", f"mysql://ensembl@localhost:3306/marco_ncbi_taxonomy") + self.pool_size = os.environ.get("POOL_SIZE", 20) + self.max_overflow = os.environ.get("MAX_OVERFLOW", 0) + self.pool_recycle = os.environ.get("POOL_RECYCLE", 50) + self.allow_unreleased = parse_boolean_var(os.environ.get("ALLOW_UNRELEASED", False)) + self.debug_mode = parse_boolean_var(os.environ.get("DEBUG", False)) + + diff --git a/src/ensembl/production/metadata/grpc/service.py b/src/ensembl/production/metadata/grpc/service.py index 23cd69dc..84f2607b 100644 --- a/src/ensembl/production/metadata/grpc/service.py +++ b/src/ensembl/production/metadata/grpc/service.py @@ -15,13 +15,14 @@ from grpc_reflection.v1alpha import reflection -from ensembl.production.metadata.grpc.config import MetadataConfig as cfg +from ensembl.production.metadata.grpc.config import MetadataConfig from ensembl.production.metadata.grpc import ensembl_metadata_pb2_grpc, ensembl_metadata_pb2 from ensembl.production.metadata.grpc.servicer import EnsemblMetadataServicer logger = logging.getLogger(__name__) # Determine the logging level based on the value of cfg.debug_mode +cfg = MetadataConfig() log_level = logging.DEBUG if cfg.debug_mode else logging.WARNING logging.basicConfig( @@ -45,6 +46,7 @@ def serve(): try: logger.info(f"Starting GRPC Server from {cfg.metadata_uri}") server.wait_for_termination() + yield server except KeyboardInterrupt: logger.info("KeyboardInterrupt caught, stopping the server...") server.stop(grace=0) # Immediately stop the server diff --git a/src/ensembl/production/metadata/grpc/utils.py b/src/ensembl/production/metadata/grpc/utils.py index 3d84087d..1d870e18 100644 --- a/src/ensembl/production/metadata/grpc/utils.py +++ b/src/ensembl/production/metadata/grpc/utils.py @@ -14,13 +14,14 @@ from ensembl.production.metadata.api.models import Genome from ensembl.production.metadata.grpc import ensembl_metadata_pb2 -from ensembl.production.metadata.grpc.config import MetadataConfig as cfg +from ensembl.production.metadata.grpc.config import MetadataConfig from ensembl.production.metadata.grpc.adaptors.genome import GenomeAdaptor from ensembl.production.metadata.grpc.adaptors.release import ReleaseAdaptor import ensembl.production.metadata.grpc.protobuf_msg_factory as msg_factory logger = logging.getLogger(__name__) +cfg = MetadataConfig() def connect_to_db(): conn = GenomeAdaptor( diff --git a/src/tests/conftest.py b/src/tests/conftest.py index 41d36a9c..bb1aed01 100644 --- a/src/tests/conftest.py +++ b/src/tests/conftest.py @@ -26,7 +26,7 @@ def pytest_configure(config: Config) -> None: pytest.dbs_dir = Path(__file__).parent / 'src' / 'ensembl' / 'production' / 'metadata' / 'api' / 'sample' -@pytest.fixture(scope="class", autouse=True) +@pytest.fixture(scope="module", autouse=True) def engine(multi_dbs): os.environ["METADATA_URI"] = multi_dbs["ensembl_genome_metadata"].dbc.url os.environ["TAXONOMY_URI"] = multi_dbs["ncbi_taxonomy"].dbc.url diff --git a/src/tests/test_reflection.py b/src/tests/test_reflection.py index df813bb6..4840aabf 100644 --- a/src/tests/test_reflection.py +++ b/src/tests/test_reflection.py @@ -19,6 +19,7 @@ from google.protobuf.descriptor_pool import DescriptorPool from grpc_reflection.v1alpha import reflection from grpc_reflection.v1alpha.proto_reflection_descriptor_database import ProtoReflectionDescriptorDatabase +from yagrc import reflector as yagrc_reflector from ensembl.production.metadata.grpc import ensembl_metadata_pb2 from ensembl.production.metadata.grpc.ensembl_metadata_pb2_grpc import EnsemblMetadata @@ -34,19 +35,11 @@ def grpc_add_to_server(): @pytest.fixture(scope='module') -def grpc_servicer(): +def grpc_servicer(multi_dbs, engine): from ensembl.production.metadata.grpc.servicer import EnsemblMetadataServicer - return EnsemblMetadataServicer() -@pytest.fixture(scope='module') -def grpc_stub(grpc_channel): - from ensembl.production.metadata.grpc.ensembl_metadata_pb2_grpc import EnsemblMetadataStub - - return EnsemblMetadataStub(grpc_channel) - - @pytest.fixture(scope='module') def grpc_server(_grpc_server, grpc_addr, grpc_add_to_server, grpc_servicer): grpc_add_to_server(grpc_servicer, _grpc_server) @@ -83,14 +76,14 @@ def test_services_discovery(self, multi_dbs, grpc_channel, grpc_server): method_desc = metadata_service.FindMethodByName(method_name) assert isinstance(method_desc, MethodDescriptor) - def test_dynamic_invoke(self, multi_dbs, grpc_channel): - from yagrc import reflector as yagrc_reflector + def test_dynamic_invoke(self, multi_dbs, grpc_channel, grpc_server): + logger.warning("multi dbs", multi_dbs) reflector = yagrc_reflector.GrpcReflectionClient() reflector.load_protocols(grpc_channel, symbols=["ensembl_metadata.EnsemblMetadata"]) stub_class = reflector.service_stub_class("ensembl_metadata.EnsemblMetadata") request_class = reflector.message_class("ensembl_metadata.GenomeUUIDRequest") - print('GRPC CHANNEL', grpc_channel) stub = stub_class(grpc_channel) - response = stub.GetGenomeByUUID(request_class(genome_uuid='a733550b-93e7-11ec-a39d-005056b38ce3', + response = stub.GetGenomeByUUID(request_class(genome_uuid='a73351f7-93e7-11ec-a39d-005056b38ce3', release_version=None)) - print(response) + assert response.genome_uuid == 'a73351f7-93e7-11ec-a39d-005056b38ce3' + assert response.assembly.accession == 'GCA_000005845.2' From f58a1eeef67c2a326790aa47f53ca96069d9ad40 Mon Sep 17 00:00:00 2001 From: Marc Chakiachvili Date: Thu, 22 Feb 2024 11:42:35 +0000 Subject: [PATCH 07/13] Removed direct call to constructor and assign config as attribute. --- src/ensembl/production/metadata/grpc/adaptors/base.py | 6 ++++-- src/ensembl/production/metadata/grpc/adaptors/genome.py | 4 ++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/ensembl/production/metadata/grpc/adaptors/base.py b/src/ensembl/production/metadata/grpc/adaptors/base.py index 34685bc6..e88a017d 100644 --- a/src/ensembl/production/metadata/grpc/adaptors/base.py +++ b/src/ensembl/production/metadata/grpc/adaptors/base.py @@ -12,15 +12,17 @@ from ensembl.database import DBConnection from ensembl.production.metadata.grpc.config import MetadataConfig + ##Todo: Add in OrganismAdapator. Subfunction fetches all organism in popular group. and # of genomes from distinct assemblies. # Add in best genome (see doc) # More functions for related genomes -config = MetadataConfig() class BaseAdaptor: def __init__(self, metadata_uri): - self.metadata_db = DBConnection(metadata_uri, pool_size=config.pool_size, pool_recycle=config.pool_recycle) + self.config = MetadataConfig() + self.metadata_db = DBConnection(metadata_uri, pool_size=self.config.pool_size, + pool_recycle=self.config.pool_recycle) def check_parameter(param): diff --git a/src/ensembl/production/metadata/grpc/adaptors/genome.py b/src/ensembl/production/metadata/grpc/adaptors/genome.py index 64e11e5f..5977ff6f 100644 --- a/src/ensembl/production/metadata/grpc/adaptors/genome.py +++ b/src/ensembl/production/metadata/grpc/adaptors/genome.py @@ -28,8 +28,8 @@ class GenomeAdaptor(BaseAdaptor): def __init__(self, metadata_uri: str, taxonomy_uri: str): super().__init__(metadata_uri) - self.taxonomy_db = DBConnection(taxonomy_uri, pool_size=MetadataConfig().pool_size, - pool_recycle=MetadataConfig().pool_recycle) + self.taxonomy_db = DBConnection(taxonomy_uri, pool_size=self.config.pool_size, + pool_recycle=self.config.pool_recycle) def fetch_taxonomy_names(self, taxonomy_ids, synonyms=None): From 6078c5b490887562c349bf31f77970b57f1b886f Mon Sep 17 00:00:00 2001 From: Marc Chakiachvili Date: Thu, 22 Feb 2024 11:52:24 +0000 Subject: [PATCH 08/13] Moved contest to the right place --- src/tests/conftest.py => conftest.py | 0 src/ensembl/production/metadata/grpc/config.py | 5 ++--- 2 files changed, 2 insertions(+), 3 deletions(-) rename src/tests/conftest.py => conftest.py (100%) diff --git a/src/tests/conftest.py b/conftest.py similarity index 100% rename from src/tests/conftest.py rename to conftest.py diff --git a/src/ensembl/production/metadata/grpc/config.py b/src/ensembl/production/metadata/grpc/config.py index 4102d1a0..10e518d2 100644 --- a/src/ensembl/production/metadata/grpc/config.py +++ b/src/ensembl/production/metadata/grpc/config.py @@ -51,12 +51,11 @@ class MetadataConfig: def __init__(self): super().__init__() - self.metadata_uri = os.environ.get("METADATA_URI", f"mysql://ensembl@localhost:3306/marco_ensembl_genome_metadata") + self.metadata_uri = os.environ.get("METADATA_URI", + f"mysql://ensembl@localhost:3306/marco_ensembl_genome_metadata") self.taxon_uri = os.environ.get("TAXONOMY_URI", f"mysql://ensembl@localhost:3306/marco_ncbi_taxonomy") self.pool_size = os.environ.get("POOL_SIZE", 20) self.max_overflow = os.environ.get("MAX_OVERFLOW", 0) self.pool_recycle = os.environ.get("POOL_RECYCLE", 50) self.allow_unreleased = parse_boolean_var(os.environ.get("ALLOW_UNRELEASED", False)) self.debug_mode = parse_boolean_var(os.environ.get("DEBUG", False)) - - From cd67c5b71a5a72053ded73ce31f3666d2098fc46 Mon Sep 17 00:00:00 2001 From: Marc Chakiachvili Date: Thu, 22 Feb 2024 12:03:13 +0000 Subject: [PATCH 09/13] Reorganised conftest.py files for the best. --- conftest.py | 39 +---------------- src/tests/conftest.py | 83 ++++++++++++++++++++++++++++++++++++ src/tests/test_reflection.py | 33 +------------- 3 files changed, 86 insertions(+), 69 deletions(-) create mode 100644 src/tests/conftest.py diff --git a/conftest.py b/conftest.py index bb1aed01..35382af5 100644 --- a/conftest.py +++ b/conftest.py @@ -9,42 +9,5 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import os -from pathlib import Path - -from _pytest.config import Config -import pytest -import sqlalchemy as db - -from ensembl.production.metadata.grpc.adaptors.genome import GenomeAdaptor -from ensembl.production.metadata.grpc.adaptors.release import ReleaseAdaptor - +"""Global pytest configuration for Ensembl Metadata GRPC tests.""" pytest_plugins = ("ensembl.plugins.pytest_unittest",) - - -def pytest_configure(config: Config) -> None: - pytest.dbs_dir = Path(__file__).parent / 'src' / 'ensembl' / 'production' / 'metadata' / 'api' / 'sample' - - -@pytest.fixture(scope="module", autouse=True) -def engine(multi_dbs): - os.environ["METADATA_URI"] = multi_dbs["ensembl_genome_metadata"].dbc.url - os.environ["TAXONOMY_URI"] = multi_dbs["ncbi_taxonomy"].dbc.url - yield db.create_engine(multi_dbs["ensembl_genome_metadata"].dbc.url) - - -@pytest.fixture(scope="class") -def genome_db_conn(multi_dbs): - genome_conn = GenomeAdaptor( - metadata_uri=multi_dbs["ensembl_genome_metadata"].dbc.url, - taxonomy_uri=multi_dbs["ncbi_taxonomy"].dbc.url - ) - yield genome_conn - - -@pytest.fixture(scope="class") -def release_db_conn(multi_dbs): - release_conn = ReleaseAdaptor( - metadata_uri=multi_dbs["ensembl_genome_metadata"].dbc.url - ) - yield release_conn diff --git a/src/tests/conftest.py b/src/tests/conftest.py new file mode 100644 index 00000000..43f86692 --- /dev/null +++ b/src/tests/conftest.py @@ -0,0 +1,83 @@ +# See the NOTICE file distributed with this work for additional information +# regarding copyright ownership. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" Necessary fixtures for our GRPC API tests """ +import os +from pathlib import Path + +import pytest +import sqlalchemy as db +from _pytest.config import Config + +from ensembl.production.metadata.grpc.adaptors.genome import GenomeAdaptor +from ensembl.production.metadata.grpc.adaptors.release import ReleaseAdaptor + +from ensembl.production.metadata.grpc import ensembl_metadata_pb2 +from grpc_reflection.v1alpha import reflection + + +def pytest_configure(config: Config) -> None: + pytest.dbs_dir = Path(__file__).parent / 'src' / 'ensembl' / 'production' / 'metadata' / 'api' / 'sample' + + +@pytest.fixture(scope="module", autouse=True) +def engine(multi_dbs): + os.environ["METADATA_URI"] = multi_dbs["ensembl_genome_metadata"].dbc.url + os.environ["TAXONOMY_URI"] = multi_dbs["ncbi_taxonomy"].dbc.url + yield db.create_engine(multi_dbs["ensembl_genome_metadata"].dbc.url) + + +@pytest.fixture(scope="class") +def genome_db_conn(multi_dbs): + genome_conn = GenomeAdaptor( + metadata_uri=multi_dbs["ensembl_genome_metadata"].dbc.url, + taxonomy_uri=multi_dbs["ncbi_taxonomy"].dbc.url + ) + yield genome_conn + + +@pytest.fixture(scope="class") +def release_db_conn(multi_dbs): + release_conn = ReleaseAdaptor( + metadata_uri=multi_dbs["ensembl_genome_metadata"].dbc.url + ) + yield release_conn + + +@pytest.fixture(scope='module') +def grpc_add_to_server(): + from ensembl.production.metadata.grpc.ensembl_metadata_pb2_grpc import add_EnsemblMetadataServicer_to_server + + return add_EnsemblMetadataServicer_to_server + + +@pytest.fixture(scope='module') +def grpc_servicer(multi_dbs, engine): + from ensembl.production.metadata.grpc.servicer import EnsemblMetadataServicer + return EnsemblMetadataServicer() + + +@pytest.fixture(scope='module') +def grpc_server(_grpc_server, grpc_addr, grpc_add_to_server, grpc_servicer): + grpc_add_to_server(grpc_servicer, _grpc_server) + SERVICE_NAMES = ( + ensembl_metadata_pb2.DESCRIPTOR.services_by_name['EnsemblMetadata'].full_name, + reflection.SERVICE_NAME + ) + reflection.enable_server_reflection(SERVICE_NAMES, _grpc_server) + _grpc_server.add_insecure_port(grpc_addr) + _grpc_server.start() + yield _grpc_server + _grpc_server.stop(grace=None) diff --git a/src/tests/test_reflection.py b/src/tests/test_reflection.py index 4840aabf..8ac20342 100644 --- a/src/tests/test_reflection.py +++ b/src/tests/test_reflection.py @@ -17,43 +17,12 @@ import pytest from google.protobuf.descriptor import MethodDescriptor from google.protobuf.descriptor_pool import DescriptorPool -from grpc_reflection.v1alpha import reflection from grpc_reflection.v1alpha.proto_reflection_descriptor_database import ProtoReflectionDescriptorDatabase from yagrc import reflector as yagrc_reflector -from ensembl.production.metadata.grpc import ensembl_metadata_pb2 -from ensembl.production.metadata.grpc.ensembl_metadata_pb2_grpc import EnsemblMetadata logger = logging.getLogger(__name__) - -@pytest.fixture(scope='module') -def grpc_add_to_server(): - from ensembl.production.metadata.grpc.ensembl_metadata_pb2_grpc import add_EnsemblMetadataServicer_to_server - - return add_EnsemblMetadataServicer_to_server - - -@pytest.fixture(scope='module') -def grpc_servicer(multi_dbs, engine): - from ensembl.production.metadata.grpc.servicer import EnsemblMetadataServicer - return EnsemblMetadataServicer() - - -@pytest.fixture(scope='module') -def grpc_server(_grpc_server, grpc_addr, grpc_add_to_server, grpc_servicer): - grpc_add_to_server(grpc_servicer, _grpc_server) - SERVICE_NAMES = ( - ensembl_metadata_pb2.DESCRIPTOR.services_by_name['EnsemblMetadata'].full_name, - reflection.SERVICE_NAME - ) - reflection.enable_server_reflection(SERVICE_NAMES, _grpc_server) - _grpc_server.add_insecure_port(grpc_addr) - _grpc_server.start() - yield _grpc_server - _grpc_server.stop(grace=None) - - sample_path = Path(__file__).parent.parent / "ensembl" / "production" / "metadata" / "api" / "sample" @@ -64,6 +33,8 @@ class TestGRPCReflection: dbc = None def test_services_discovery(self, multi_dbs, grpc_channel, grpc_server): + from ensembl.production.metadata.grpc.ensembl_metadata_pb2_grpc import EnsemblMetadata + reflection_db = ProtoReflectionDescriptorDatabase(grpc_channel) services = reflection_db.get_services() assert 'ensembl_metadata.EnsemblMetadata' in services From 6c12d806eb6dcdf07e9170588f24bcc795014b49 Mon Sep 17 00:00:00 2001 From: Marc Chakiachvili Date: Thu, 22 Feb 2024 13:43:38 +0000 Subject: [PATCH 10/13] Added missing yagrc in requirements --- requirements.in | 1 + requirements.txt | 8 +++++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/requirements.in b/requirements.in index 53f572ee..02963670 100644 --- a/requirements.in +++ b/requirements.in @@ -4,3 +4,4 @@ grpcio-tools grpcio-reflection sqlalchemy types-pymysql +yagrc \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 6f989fe2..69e46759 100644 --- a/requirements.txt +++ b/requirements.txt @@ -21,8 +21,11 @@ grpcio==1.62.0 # -r requirements.in # grpcio-reflection # grpcio-tools + # yagrc grpcio-reflection==1.62.0 - # via -r requirements.in + # via + # -r requirements.in + # yagrc grpcio-tools==1.62.0 # via -r requirements.in idna==3.6 @@ -39,6 +42,7 @@ protobuf==4.25.3 # via # grpcio-reflection # grpcio-tools + # yagrc pytest==8.0.1 # via # ensembl-py @@ -64,6 +68,8 @@ types-pymysql==1.1.0.1 # via -r requirements.in urllib3==2.2.1 # via requests +yagrc==1.1.2 + # via -r requirements.in # The following packages are considered to be unsafe in a requirements file: # setuptools From b88571c290a3b5a13df1d835e27911919d45be3b Mon Sep 17 00:00:00 2001 From: Marc Chakiachvili Date: Thu, 22 Feb 2024 13:56:38 +0000 Subject: [PATCH 11/13] Updated Config usage. --- .../production/metadata/grpc/service.py | 17 +++++------ src/ensembl/production/metadata/grpc/utils.py | 28 +++++++++++-------- 2 files changed, 23 insertions(+), 22 deletions(-) diff --git a/src/ensembl/production/metadata/grpc/service.py b/src/ensembl/production/metadata/grpc/service.py index 84f2607b..14230dca 100644 --- a/src/ensembl/production/metadata/grpc/service.py +++ b/src/ensembl/production/metadata/grpc/service.py @@ -21,17 +21,14 @@ logger = logging.getLogger(__name__) -# Determine the logging level based on the value of cfg.debug_mode -cfg = MetadataConfig() -log_level = logging.DEBUG if cfg.debug_mode else logging.WARNING - -logging.basicConfig( - level=log_level, - format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' -) - - def serve(): + cfg = MetadataConfig() + log_level = logging.DEBUG if cfg.debug_mode else logging.WARNING + + logging.basicConfig( + level=log_level, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' + ) server = grpc.server(futures.ThreadPoolExecutor(max_workers=10)) ensembl_metadata_pb2_grpc.add_EnsemblMetadataServicer_to_server( EnsemblMetadataServicer(), server diff --git a/src/ensembl/production/metadata/grpc/utils.py b/src/ensembl/production/metadata/grpc/utils.py index 1d870e18..b39d48d7 100644 --- a/src/ensembl/production/metadata/grpc/utils.py +++ b/src/ensembl/production/metadata/grpc/utils.py @@ -21,9 +21,9 @@ logger = logging.getLogger(__name__) -cfg = MetadataConfig() def connect_to_db(): + cfg = MetadataConfig() conn = GenomeAdaptor( metadata_uri=cfg.metadata_uri, taxonomy_uri=cfg.taxon_uri @@ -120,6 +120,7 @@ def get_assembly_information(db_conn, assembly_uuid): logger.debug("No assembly information was found.") return msg_factory.create_assembly_info() + # TODO: move this function to protobuf_msg_factory.py file def create_genome_with_attributes_and_count(db_conn, genome, release_version): # we fetch attributes related to that genome @@ -148,7 +149,7 @@ def get_genomes_from_assembly_accession_iterator(db_conn, assembly_accession, re if not assembly_accession: logging.warning("Missing or Empty Assembly accession field.") return msg_factory.create_genome() - + cfg = MetadataConfig() # TODO: Add try except to the other functions as well try: genome_results = db_conn.fetch_genomes( @@ -170,7 +171,7 @@ def get_species_information(db_conn, genome_uuid): if not genome_uuid: logger.warning("Missing or Empty Genome UUID field.") return msg_factory.create_species() - + cfg = MetadataConfig() species_results = db_conn.fetch_genomes( genome_uuid=genome_uuid, allow_unreleased=cfg.allow_unreleased @@ -193,7 +194,7 @@ def get_sub_species_info(db_conn, organism_uuid, group): if not organism_uuid: logger.warning("Missing or Empty Organism UUID field.") return msg_factory.create_sub_species() - + cfg = MetadataConfig() sub_species_results = db_conn.fetch_genomes( organism_uuid=organism_uuid, group=group, @@ -222,6 +223,7 @@ def get_sub_species_info(db_conn, organism_uuid, group): def get_genome_uuid(db_conn, production_name, assembly_name, use_default=False): + cfg = MetadataConfig() if not production_name or not assembly_name: logger.warning("Missing or Empty production_name or assembly_name field.") return msg_factory.create_genome_uuid() @@ -251,7 +253,7 @@ def get_genome_by_uuid(db_conn, genome_uuid, release_version): if not genome_uuid: logger.warning("Missing or Empty Genome UUID field.") return msg_factory.create_genome() - + cfg = MetadataConfig() genome_results = db_conn.fetch_genomes( genome_uuid=genome_uuid, release_version=release_version, @@ -306,7 +308,7 @@ def get_genome_by_name(db_conn, ensembl_name, site_name, release_version): if not ensembl_name and not site_name: logger.warning("Missing or Empty ensembl_name and site_name field.") return msg_factory.create_genome() - + cfg = MetadataConfig() genome_results = db_conn.fetch_genomes( ensembl_name=ensembl_name, site_name=site_name, @@ -331,7 +333,7 @@ def get_datasets_list_by_uuid(db_conn, genome_uuid, release_version): if not genome_uuid: logger.warning("Missing or Empty Genome UUID field.") return msg_factory.create_datasets() - + cfg = MetadataConfig() datasets_results = db_conn.fetch_genome_datasets( genome_uuid=genome_uuid, # fetch all datasets, default is 'assembly' only @@ -423,6 +425,7 @@ def genome_assembly_sequence_region(db_conn, genome_uuid, sequence_region_name): def release_iterator(metadata_db, site_name, release_version, current_only): + cfg = MetadataConfig() conn = ReleaseAdaptor(metadata_uri=cfg.metadata_uri) # set release_version/site_name to None if it's an empty list @@ -436,21 +439,23 @@ def release_iterator(metadata_db, site_name, release_version, current_only): ) for result in release_results: - logging.debug(f"Processing release: {result.EnsemblRelease.version if hasattr(result, 'EnsemblRelease') else None}") + logging.debug( + f"Processing release: {result.EnsemblRelease.version if hasattr(result, 'EnsemblRelease') else None}") yield msg_factory.create_release(result) def release_by_uuid_iterator(metadata_db, genome_uuid): if not genome_uuid: return - + cfg = MetadataConfig() conn = ReleaseAdaptor(metadata_uri=cfg.metadata_uri) release_results = conn.fetch_releases_for_genome( genome_uuid=genome_uuid, ) for result in release_results: - logging.debug(f"Processing release: {result.EnsemblRelease.version if hasattr(result, 'EnsemblRelease') else None}") + logging.debug( + f"Processing release: {result.EnsemblRelease.version if hasattr(result, 'EnsemblRelease') else None}") yield msg_factory.create_release(result) @@ -480,7 +485,7 @@ def get_genome_uuid_by_tag(db_conn, genome_tag): if not genome_tag: logger.warning("Missing or Empty Genome tag field.") return msg_factory.create_genome_uuid() - + cfg = MetadataConfig() genome_uuid_result = db_conn.fetch_genomes( genome_tag=genome_tag, allow_unreleased=cfg.allow_unreleased @@ -501,7 +506,6 @@ def get_genome_uuid_by_tag(db_conn, genome_tag): def get_ftp_links(db_conn, genome_uuid, dataset_type, release_version): - # Request is sending an empty string '' instead of None when # an input parameter is not supplied by the user if not genome_uuid: From a3305766ff5c14a8c96996bd45983555a4637303 Mon Sep 17 00:00:00 2001 From: Marc Chakiachvili Date: Thu, 22 Feb 2024 14:11:58 +0000 Subject: [PATCH 12/13] Forces ENV vars in travis --- .travis.yml | 10 ++--- .../production/metadata/grpc/config.py | 2 +- .../production/metadata/grpc/service.py | 3 +- src/ensembl/production/metadata/grpc/utils.py | 37 +++++++------------ 4 files changed, 22 insertions(+), 30 deletions(-) diff --git a/.travis.yml b/.travis.yml index a37434e1..dca6577a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,11 +6,11 @@ python: services: - mysql before_script: - - pip install -r requirements.txt - pip install -r requirements-test.txt - pip install . - - export PYTHONPATH=$PYTHONPATH:$PWD/src - +env: + - METADATA_URI=mysql://travis@127.0.0.1:3306/travis_ensembl_genome_metadata + - TAXONOMY_URI=mysql://travis@127.0.0.1:3306/travis_ncbi_taxonomy script: - - echo "DB_HOST ENV $DB_HOST" - - coverage run -m pytest --server mysql://travis@127.0.0.1:3306 + - echo "DB_HOST $METADATA_URI $TAXONOMY_URI" + - coverage run -m pytest -c pyproject.toml --server mysql://travis@127.0.0.1:3306 diff --git a/src/ensembl/production/metadata/grpc/config.py b/src/ensembl/production/metadata/grpc/config.py index 10e518d2..1c18372c 100644 --- a/src/ensembl/production/metadata/grpc/config.py +++ b/src/ensembl/production/metadata/grpc/config.py @@ -52,7 +52,7 @@ class MetadataConfig: def __init__(self): super().__init__() self.metadata_uri = os.environ.get("METADATA_URI", - f"mysql://ensembl@localhost:3306/marco_ensembl_genome_metadata") + f"mysql://ensembl@localhost:3306/ensembl_genome_metadata") self.taxon_uri = os.environ.get("TAXONOMY_URI", f"mysql://ensembl@localhost:3306/marco_ncbi_taxonomy") self.pool_size = os.environ.get("POOL_SIZE", 20) self.max_overflow = os.environ.get("MAX_OVERFLOW", 0) diff --git a/src/ensembl/production/metadata/grpc/service.py b/src/ensembl/production/metadata/grpc/service.py index 14230dca..684e3405 100644 --- a/src/ensembl/production/metadata/grpc/service.py +++ b/src/ensembl/production/metadata/grpc/service.py @@ -21,6 +21,7 @@ logger = logging.getLogger(__name__) + def serve(): cfg = MetadataConfig() log_level = logging.DEBUG if cfg.debug_mode else logging.WARNING @@ -42,6 +43,7 @@ def serve(): server.start() try: logger.info(f"Starting GRPC Server from {cfg.metadata_uri}") + logger.info(f"DEBUG: {cfg.debug_mode}") server.wait_for_termination() yield server except KeyboardInterrupt: @@ -52,5 +54,4 @@ def serve(): if __name__ == "__main__": logger.info("gRPC server starting on port 50051...") - logger.info(f"DEBUG: {cfg.debug_mode}") serve() diff --git a/src/ensembl/production/metadata/grpc/utils.py b/src/ensembl/production/metadata/grpc/utils.py index b39d48d7..718ecb2f 100644 --- a/src/ensembl/production/metadata/grpc/utils.py +++ b/src/ensembl/production/metadata/grpc/utils.py @@ -23,10 +23,9 @@ def connect_to_db(): - cfg = MetadataConfig() conn = GenomeAdaptor( - metadata_uri=cfg.metadata_uri, - taxonomy_uri=cfg.taxon_uri + metadata_uri=MetadataConfig().metadata_uri, + taxonomy_uri=MetadataConfig().taxon_uri ) return conn @@ -149,12 +148,11 @@ def get_genomes_from_assembly_accession_iterator(db_conn, assembly_accession, re if not assembly_accession: logging.warning("Missing or Empty Assembly accession field.") return msg_factory.create_genome() - cfg = MetadataConfig() # TODO: Add try except to the other functions as well try: genome_results = db_conn.fetch_genomes( assembly_accession=assembly_accession, - allow_unreleased=cfg.allow_unreleased + allow_unreleased=MetadataConfig().allow_unreleased ) except Exception as e: logging.error(f"Error fetching genomes: {e}") @@ -171,10 +169,9 @@ def get_species_information(db_conn, genome_uuid): if not genome_uuid: logger.warning("Missing or Empty Genome UUID field.") return msg_factory.create_species() - cfg = MetadataConfig() species_results = db_conn.fetch_genomes( genome_uuid=genome_uuid, - allow_unreleased=cfg.allow_unreleased + allow_unreleased=MetadataConfig().allow_unreleased ) if len(species_results) == 1: tax_id = species_results[0].Organism.taxonomy_id @@ -194,11 +191,10 @@ def get_sub_species_info(db_conn, organism_uuid, group): if not organism_uuid: logger.warning("Missing or Empty Organism UUID field.") return msg_factory.create_sub_species() - cfg = MetadataConfig() sub_species_results = db_conn.fetch_genomes( organism_uuid=organism_uuid, group=group, - allow_unreleased=cfg.allow_unreleased + allow_unreleased=MetadataConfig().allow_unreleased ) species_name = [] @@ -223,7 +219,6 @@ def get_sub_species_info(db_conn, organism_uuid, group): def get_genome_uuid(db_conn, production_name, assembly_name, use_default=False): - cfg = MetadataConfig() if not production_name or not assembly_name: logger.warning("Missing or Empty production_name or assembly_name field.") return msg_factory.create_genome_uuid() @@ -232,7 +227,7 @@ def get_genome_uuid(db_conn, production_name, assembly_name, use_default=False): production_name=production_name, assembly_name=assembly_name, use_default_assembly=use_default, - allow_unreleased=cfg.allow_unreleased + allow_unreleased=MetadataConfig().allow_unreleased ) if len(genome_uuid_result) == 1: @@ -253,11 +248,10 @@ def get_genome_by_uuid(db_conn, genome_uuid, release_version): if not genome_uuid: logger.warning("Missing or Empty Genome UUID field.") return msg_factory.create_genome() - cfg = MetadataConfig() genome_results = db_conn.fetch_genomes( genome_uuid=genome_uuid, release_version=release_version, - allow_unreleased=cfg.allow_unreleased + allow_unreleased=MetadataConfig().allow_unreleased ) if len(genome_results) == 1: @@ -308,12 +302,11 @@ def get_genome_by_name(db_conn, ensembl_name, site_name, release_version): if not ensembl_name and not site_name: logger.warning("Missing or Empty ensembl_name and site_name field.") return msg_factory.create_genome() - cfg = MetadataConfig() genome_results = db_conn.fetch_genomes( ensembl_name=ensembl_name, site_name=site_name, release_version=release_version, - allow_unreleased=cfg.allow_unreleased + allow_unreleased=MetadataConfig().allow_unreleased ) if len(genome_results) == 1: response_data = create_genome_with_attributes_and_count( @@ -333,13 +326,12 @@ def get_datasets_list_by_uuid(db_conn, genome_uuid, release_version): if not genome_uuid: logger.warning("Missing or Empty Genome UUID field.") return msg_factory.create_datasets() - cfg = MetadataConfig() datasets_results = db_conn.fetch_genome_datasets( genome_uuid=genome_uuid, # fetch all datasets, default is 'assembly' only dataset_type_name="all", release_version=release_version, - allow_unreleased=cfg.allow_unreleased, + allow_unreleased=MetadataConfig().allow_unreleased, dataset_attributes=True ) @@ -425,8 +417,7 @@ def genome_assembly_sequence_region(db_conn, genome_uuid, sequence_region_name): def release_iterator(metadata_db, site_name, release_version, current_only): - cfg = MetadataConfig() - conn = ReleaseAdaptor(metadata_uri=cfg.metadata_uri) + conn = ReleaseAdaptor(metadata_uri=MetadataConfig().metadata_uri) # set release_version/site_name to None if it's an empty list release_version = release_version or None @@ -447,8 +438,8 @@ def release_iterator(metadata_db, site_name, release_version, current_only): def release_by_uuid_iterator(metadata_db, genome_uuid): if not genome_uuid: return - cfg = MetadataConfig() - conn = ReleaseAdaptor(metadata_uri=cfg.metadata_uri) + + conn = ReleaseAdaptor(metadata_uri=MetadataConfig().metadata_uri) release_results = conn.fetch_releases_for_genome( genome_uuid=genome_uuid, ) @@ -485,10 +476,10 @@ def get_genome_uuid_by_tag(db_conn, genome_tag): if not genome_tag: logger.warning("Missing or Empty Genome tag field.") return msg_factory.create_genome_uuid() - cfg = MetadataConfig() + genome_uuid_result = db_conn.fetch_genomes( genome_tag=genome_tag, - allow_unreleased=cfg.allow_unreleased + allow_unreleased=MetadataConfig().allow_unreleased ) if len(genome_uuid_result) == 1: From 506a9505b2f5a04541ce804bbc193f9d7768db08 Mon Sep 17 00:00:00 2001 From: Marc Chakiachvili Date: Thu, 22 Feb 2024 14:13:25 +0000 Subject: [PATCH 13/13] Forces ENV vars in travis --- .travis.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index dca6577a..b6d95510 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,9 +8,6 @@ services: before_script: - pip install -r requirements-test.txt - pip install . -env: - - METADATA_URI=mysql://travis@127.0.0.1:3306/travis_ensembl_genome_metadata - - TAXONOMY_URI=mysql://travis@127.0.0.1:3306/travis_ncbi_taxonomy script: - echo "DB_HOST $METADATA_URI $TAXONOMY_URI" - coverage run -m pytest -c pyproject.toml --server mysql://travis@127.0.0.1:3306