From 21fd9614f03e481980ba1392ace4d01cd5f94d0d Mon Sep 17 00:00:00 2001 From: Bilal Date: Wed, 24 Jan 2024 13:39:19 +0000 Subject: [PATCH 1/5] fix ALLOW_UNRELEASED and started adding logging --- .../metadata/grpc/adaptors/genome.py | 15 +++++--- .../production/metadata/grpc/config.py | 37 ++++++++++++++++++- .../production/metadata/grpc/service.py | 15 +++++++- .../production/metadata/grpc/servicer.py | 23 ++++++++++++ src/ensembl/production/metadata/grpc/utils.py | 17 +++++++-- 5 files changed, 96 insertions(+), 11 deletions(-) diff --git a/src/ensembl/production/metadata/grpc/adaptors/genome.py b/src/ensembl/production/metadata/grpc/adaptors/genome.py index 09551860..5e1cb2b2 100644 --- a/src/ensembl/production/metadata/grpc/adaptors/genome.py +++ b/src/ensembl/production/metadata/grpc/adaptors/genome.py @@ -199,14 +199,15 @@ def fetch_genomes(self, genome_id=None, genome_uuid=None, genome_tag=None, organ if allow_unreleased: # fetch everything (released + unreleased) - pass + logger.info("ALLOW_UNRELEASED is set to True...") elif unreleased_only: # fetch unreleased only # this filter will get all Genome entries where there's no associated GenomeRelease # the tilde (~) symbol is used for negation. genome_select = genome_select.filter(~Genome.genome_releases.any()) + logger.info("Fetching only unreleased data...") else: - # fetch released only + logger.info("Fetching released data only...") # Check if genome is released # TODO: why did I add this check?! -> removing this breaks the test_update tests with self.metadata_db.session_scope() as session: @@ -220,6 +221,7 @@ def fetch_genomes(self, genome_id=None, genome_uuid=None, genome_tag=None, organ is_genome_released = session.execute(prep_query).first() if is_genome_released: + logger.info(f"Genome UUID '{genome_uuid}' is released") # Include release related info if released_only is True genome_select = genome_select.add_columns(GenomeRelease, EnsemblRelease, EnsemblSite) \ .join(GenomeRelease, Genome.genome_id == GenomeRelease.genome_id) \ @@ -240,7 +242,11 @@ def fetch_genomes(self, genome_id=None, genome_uuid=None, genome_tag=None, organ if release_type is not None: genome_select = genome_select.filter(EnsemblRelease.release_type == release_type) - # print(f"genome_select query ====> {str(genome_select)}") + else: + logger.info(f"Genome UUID '{genome_uuid}' doesn't exist or it's not released yet.") + return [] + + logger.debug(genome_select) with self.metadata_db.session_scope() as session: session.expire_on_commit = False return session.execute(genome_select.order_by("ensembl_name")).all() @@ -549,7 +555,6 @@ def fetch_genome_datasets(self, genome_id=None, genome_uuid=None, organism_uuid= if release_version: genome_select = genome_select.filter(EnsemblRelease.version <= release_version) - # print(f"genome_select str ====> {str(genome_select)}") logger.debug(genome_select) with self.metadata_db.session_scope() as session: session.expire_on_commit = False @@ -669,6 +674,6 @@ def fetch_related_assemblies_count(self, organism_uuid, release_version=None): raise NotImplementedError('Not implemented yet') pass - # print(f"query ---> {query}") + logger.debug(query) with self.metadata_db.session_scope() as session: return session.execute(query).scalar() diff --git a/src/ensembl/production/metadata/grpc/config.py b/src/ensembl/production/metadata/grpc/config.py index ca57a3ca..306b71fb 100644 --- a/src/ensembl/production/metadata/grpc/config.py +++ b/src/ensembl/production/metadata/grpc/config.py @@ -10,6 +10,41 @@ # See the License for the specific language governing permissions and # limitations under the License. import os +import warnings + + +def parse_boolean_var(var): + """ + Parse an input variable into a boolean value. + + This function interprets the input `var` and attempts to convert it into a boolean value (`True` or `False`). + It handles booleans and strings specifically, and defaults to `False` for other types with a warning. + + Args: + var (bool|str|any): The variable to parse. This can be a boolean, a string, or any other type. + - If it's a boolean, it's returned as-is. + - If it's a string, it's considered `True` unless it's 'f', 'false', 'no', 'none', '0' + or 'n' (case-insensitive), or it's an empty string. + - For other types, a warning is issued, and `False` is returned. + + Returns: + bool: The parsed boolean value. Returns `True` or `False` based on the input: + - `True` if `var` is `True`, a non-falsy string not equal to 'f', 'false', 'no', 'none', '0', or 'n'. + - `False` if `var` is `False`, a string equal to 'f', 'false', 'no', 'none', '0', or 'n', any non-string and + non-boolean input, or an empty string. + + Raises: + Warning: If `var` is not a boolean or a string, a warning is raised indicating the input + couldn't be parsed to a boolean. + """ + if isinstance(var, bool): + return var + elif isinstance(var, str): + return not ((var.lower() in ("f", "false", "no", "none", "0", "n")) or (not var)) + else: + # default to false, something is wrong. + warnings.warn(f"Var {var} couldn't be parsed to boolean") + return False class MetadataConfig: @@ -18,4 +53,4 @@ class MetadataConfig: pool_size = os.environ.get("POOL_SIZE", 20) max_overflow = os.environ.get("MAX_OVERFLOW", 0) pool_recycle = os.environ.get("POOL_RECYCLE", 50) - allow_unreleased = os.environ.get("ALLOW_UNRELEASED", False) + allow_unreleased = parse_boolean_var(os.environ.get("ALLOW_UNRELEASED", False)) diff --git a/src/ensembl/production/metadata/grpc/service.py b/src/ensembl/production/metadata/grpc/service.py index 75a145ca..da21f6e4 100644 --- a/src/ensembl/production/metadata/grpc/service.py +++ b/src/ensembl/production/metadata/grpc/service.py @@ -16,6 +16,12 @@ from ensembl.production.metadata.grpc import ensembl_metadata_pb2_grpc from ensembl.production.metadata.grpc.servicer import EnsemblMetadataServicer +logger = logging.getLogger(__name__) +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' +) + def serve(): server = grpc.server(futures.ThreadPoolExecutor(max_workers=10)) @@ -24,9 +30,14 @@ def serve(): ) server.add_insecure_port("[::]:50051") server.start() - server.wait_for_termination() + try: + server.wait_for_termination() + except KeyboardInterrupt: + logger.info("KeyboardInterrupt caught, stopping the server...") + server.stop(grace=0) # Immediately stop the server + logger.info("gRPC server has shut down gracefully") if __name__ == "__main__": - logging.basicConfig() + logger.info("gRPC server starting on port 50051...") serve() diff --git a/src/ensembl/production/metadata/grpc/servicer.py b/src/ensembl/production/metadata/grpc/servicer.py index c7375a5e..a6b979cb 100644 --- a/src/ensembl/production/metadata/grpc/servicer.py +++ b/src/ensembl/production/metadata/grpc/servicer.py @@ -9,88 +9,111 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import logging + from ensembl.production.metadata.grpc import ensembl_metadata_pb2_grpc import ensembl.production.metadata.grpc.utils as utils +logger = logging.getLogger(__name__) + class EnsemblMetadataServicer(ensembl_metadata_pb2_grpc.EnsemblMetadataServicer): def __init__(self): self.db = utils.connect_to_db() def GetSpeciesInformation(self, request, context): + logger.info(f"Received RPC for GetSpeciesInformation with request: {request}") return utils.get_species_information(self.db, request.genome_uuid) def GetAssemblyInformation(self, request, context): + logger.info(f"Received RPC for GetAssemblyInformation with request: {request}") return utils.get_assembly_information(self.db, request.assembly_uuid) def GetGenomesByAssemblyAccessionID(self, request, context): + logger.info(f"Received RPC for GetGenomesByAssemblyAccessionID with request: {request}") return utils.get_genomes_from_assembly_accession_iterator( self.db, request.assembly_accession, request.release_version ) def GetSubSpeciesInformation(self, request, context): + logger.info(f"Received RPC for GetSubSpeciesInformation with request: {request}") return utils.get_sub_species_info(self.db, request.organism_uuid, request.group) def GetTopLevelStatistics(self, request, context): + logger.info(f"Received RPC for GetTopLevelStatistics with request: {request}") return utils.get_top_level_statistics(self.db, request.organism_uuid, request.group) def GetTopLevelStatisticsByUUID(self, request, context): + logger.info(f"Received RPC for GetTopLevelStatisticsByUUID with request: {request}") return utils.get_top_level_statistics_by_uuid(self.db, request.genome_uuid) def GetGenomeUUID(self, request, context): + logger.info(f"Received RPC for GetGenomeUUID with request: {request}") return utils.get_genome_uuid(self.db, request.ensembl_name, request.assembly_name, request.use_default) def GetGenomeByUUID(self, request, context): + logger.info(f"Received RPC for GetGenomeByUUID with request: {request}") return utils.get_genome_by_uuid(self.db, request.genome_uuid, request.release_version) def GetGenomesByKeyword(self, request, context): + logger.info(f"Received RPC for GetGenomesByKeyword with request: {request}") return utils.get_genomes_by_keyword_iterator( self.db, request.keyword, request.release_version ) def GetGenomeByName(self, request, context): + logger.info(f"Received RPC for GetGenomeByName with request: {request}") return utils.get_genome_by_name( self.db, request.ensembl_name, request.site_name, request.release_version ) def GetRelease(self, request, context): + logger.info(f"Received RPC for GetRelease with request: {request}") return utils.release_iterator( self.db, request.site_name, request.release_version, request.current_only ) def GetReleaseByUUID(self, request, context): + logger.info(f"Received RPC for GetReleaseByUUID with request: {request}") return utils.release_by_uuid_iterator(self.db, request.genome_uuid) def GetGenomeSequence(self, request, context): + logger.info(f"Received RPC for GetGenomeSequence with request: {request}") return utils.genome_sequence_iterator( self.db, request.genome_uuid, request.chromosomal_only ) def GetAssemblyRegion(self, request, context): + logger.info(f"Received RPC for GetAssemblyRegion with request: {request}") return utils.assembly_region_iterator( self.db, request.genome_uuid, request.chromosomal_only ) def GetGenomeAssemblySequenceRegion(self, request, context): + logger.info(f"Received RPC for GetGenomeAssemblySequenceRegion with request: {request}") return utils.genome_assembly_sequence_region( self.db, request.genome_uuid, request.sequence_region_name ) def GetDatasetsListByUUID(self, request, context): + logger.info(f"Received RPC for GetDatasetsListByUUID with request: {request}") return utils.get_datasets_list_by_uuid( self.db, request.genome_uuid, request.release_version ) def GetDatasetInformation(self, request, context): + logger.info(f"Received RPC for GetDatasetInformation with request: {request}") return utils.get_dataset_by_genome_and_dataset_type( self.db, request.genome_uuid, request.dataset_type ) def GetOrganismsGroupCount(self, request, context): + logger.info(f"Received RPC for GetOrganismsGroupCount with request: {request}") return utils.get_organisms_group_count( self.db, request.release_version ) def GetGenomeUUIDByTag(self, request, context): + logger.info(f"Received RPC for GetGenomeUUIDByTag with request: {request}") return utils.get_genome_uuid_by_tag(self.db, request.genome_tag) diff --git a/src/ensembl/production/metadata/grpc/utils.py b/src/ensembl/production/metadata/grpc/utils.py index 0c79a2ed..6732077a 100644 --- a/src/ensembl/production/metadata/grpc/utils.py +++ b/src/ensembl/production/metadata/grpc/utils.py @@ -10,12 +10,16 @@ # See the License for the specific language governing permissions and # limitations under the License. import itertools +import logging + from ensembl.production.metadata.grpc import ensembl_metadata_pb2 from ensembl.production.metadata.grpc.config import MetadataConfig as cfg from ensembl.production.metadata.grpc.adaptors.genome import GenomeAdaptor from ensembl.production.metadata.grpc.adaptors.release import ReleaseAdaptor import ensembl.production.metadata.grpc.protobuf_msg_factory as msg_factory +logger = logging.getLogger(__name__) + def connect_to_db(): conn = GenomeAdaptor( @@ -135,6 +139,7 @@ def get_genomes_from_assembly_accession_iterator(db_conn, assembly_accession, re return msg_factory.create_genome() + def get_species_information(db_conn, genome_uuid): if genome_uuid is None: return msg_factory.create_species() @@ -211,10 +216,10 @@ def get_genome_uuid(db_conn, ensembl_name, assembly_name, use_default=False): def get_genome_by_uuid(db_conn, genome_uuid, release_version): - if genome_uuid is None: + if genome_uuid is None or not genome_uuid: + logger.debug("Missing or Empty Genome UUID field.") return msg_factory.create_genome() - # We first get the genome info genome_results = db_conn.fetch_genomes( genome_uuid=genome_uuid, release_version=release_version, @@ -222,10 +227,16 @@ def get_genome_by_uuid(db_conn, genome_uuid, release_version): ) if len(genome_results) == 1: - return create_genome_with_attributes_and_count( + response_data = create_genome_with_attributes_and_count( db_conn=db_conn, genome=genome_results[0], release_version=release_version ) + logger.debug(f"Response data: \n{response_data}") + return response_data + elif len(genome_results) > 1: + logger.debug("Multiple results returned.") + else: + logger.debug("Genome not found.") return msg_factory.create_genome() From d4c06dc4865d65efcec41cf7eca982b2602973ed Mon Sep 17 00:00:00 2001 From: Bilal Date: Wed, 24 Jan 2024 18:16:10 +0000 Subject: [PATCH 2/5] add the rest of logging prompts --- .../metadata/grpc/ensembl_metadata_pb2.py | 5 +- .../production/metadata/grpc/service.py | 2 +- src/ensembl/production/metadata/grpc/utils.py | 144 +++++++++++++----- 3 files changed, 113 insertions(+), 38 deletions(-) diff --git a/src/ensembl/production/metadata/grpc/ensembl_metadata_pb2.py b/src/ensembl/production/metadata/grpc/ensembl_metadata_pb2.py index f17d2d39..b3fa5ddb 100755 --- a/src/ensembl/production/metadata/grpc/ensembl_metadata_pb2.py +++ b/src/ensembl/production/metadata/grpc/ensembl_metadata_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: ensembl/production/metadata/grpc/ensembl_metadata.proto +# Protobuf Python Version: 4.25.0 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -20,8 +21,8 @@ _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'ensembl.production.metadata.grpc.ensembl_metadata_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: DESCRIPTOR._options = None - _DATASETS_DATASETSENTRY._options = None - _DATASETS_DATASETSENTRY._serialized_options = b'8\001' + _globals['_DATASETS_DATASETSENTRY']._options = None + _globals['_DATASETS_DATASETSENTRY']._serialized_options = b'8\001' _globals['_GENOME']._serialized_start=78 _globals['_GENOME']._serialized_end=393 _globals['_SPECIES']._serialized_start=396 diff --git a/src/ensembl/production/metadata/grpc/service.py b/src/ensembl/production/metadata/grpc/service.py index da21f6e4..7bf23e43 100644 --- a/src/ensembl/production/metadata/grpc/service.py +++ b/src/ensembl/production/metadata/grpc/service.py @@ -18,7 +18,7 @@ logger = logging.getLogger(__name__) logging.basicConfig( - level=logging.INFO, + level=logging.DEBUG, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' ) diff --git a/src/ensembl/production/metadata/grpc/utils.py b/src/ensembl/production/metadata/grpc/utils.py index 6732077a..54d06d21 100644 --- a/src/ensembl/production/metadata/grpc/utils.py +++ b/src/ensembl/production/metadata/grpc/utils.py @@ -41,11 +41,14 @@ def get_alternative_names(db_conn, taxon_id): # remove duplicates unique_alternative_names = list(set(alternative_names)) # sort before returning (otherwise the test breaks) - return sorted(unique_alternative_names) + sorted_unique_alternative_names = sorted(unique_alternative_names) + logger.debug(sorted_unique_alternative_names) + return sorted_unique_alternative_names def get_top_level_statistics(db_conn, organism_uuid, group): - if organism_uuid is None: + if not organism_uuid: + logger.warning("Missing or Empty Organism UUID field.") return msg_factory.create_top_level_statistics() stats_results = db_conn.fetch_genome_datasets( @@ -56,16 +59,20 @@ def get_top_level_statistics(db_conn, organism_uuid, group): if len(stats_results) > 0: stats_by_genome_uuid = msg_factory.create_stats_by_genome_uuid(stats_results) - return msg_factory.create_top_level_statistics({ + response_data = msg_factory.create_top_level_statistics({ 'organism_uuid': organism_uuid, 'stats_by_genome_uuid': stats_by_genome_uuid }) + logger.debug(f"Response data: \n{response_data}") + return response_data + logger.debug("No top level stats found.") return msg_factory.create_top_level_statistics() def get_top_level_statistics_by_uuid(db_conn, genome_uuid): - if genome_uuid is None: + if not genome_uuid: + logger.warning("Missing or Empty Genome UUID field.") return msg_factory.create_top_level_statistics_by_uuid() stats_results = db_conn.fetch_genome_datasets( @@ -83,26 +90,34 @@ def get_top_level_statistics_by_uuid(db_conn, genome_uuid): 'statistic_type': result.Attribute.type, 'statistic_value': result.DatasetAttribute.value }) - return msg_factory.create_top_level_statistics_by_uuid( + + response_data = msg_factory.create_top_level_statistics_by_uuid( ({"genome_uuid": genome_uuid, "statistics": statistics}) ) + logger.debug(f"Response data: \n{response_data}") + return response_data + logger.debug("No top level stats found.") return msg_factory.create_top_level_statistics_by_uuid() def get_assembly_information(db_conn, assembly_uuid): - if assembly_uuid is None: + if not assembly_uuid: + logger.warning("Missing or Empty Assembly UUID field.") return msg_factory.create_assembly_info() assembly_results = db_conn.fetch_sequences( assembly_uuid=assembly_uuid ) if len(assembly_results) > 0: - return msg_factory.create_assembly_info(assembly_results[0]) + response_data = msg_factory.create_assembly_info(assembly_results[0]) + logger.debug(f"Response data: \n{response_data}") + return response_data + logger.debug("No assembly information was found.") return msg_factory.create_assembly_info() - +# TODO: move this function to protobuf_msg_factory.py file def create_genome_with_attributes_and_count(db_conn, genome, release_version): # we fetch attributes related to that genome attrib_data_results = db_conn.fetch_genome_datasets( @@ -127,21 +142,30 @@ def create_genome_with_attributes_and_count(db_conn, genome, release_version): def get_genomes_from_assembly_accession_iterator(db_conn, assembly_accession, release_version): - if assembly_accession is None: + if not assembly_accession: + logging.warning("Missing or Empty Assembly accession field.") return msg_factory.create_genome() - genome_results = db_conn.fetch_genomes( - assembly_accession=assembly_accession, - allow_unreleased=cfg.allow_unreleased - ) + # TODO: Add try except to the other functions as well + try: + genome_results = db_conn.fetch_genomes( + assembly_accession=assembly_accession, + allow_unreleased=cfg.allow_unreleased + ) + except Exception as e: + logging.error(f"Error fetching genomes: {e}") + raise + for genome in genome_results: + logging.debug(f"Processing genome: {genome.Genome.genome_uuid}") yield msg_factory.create_genome(data=genome) return msg_factory.create_genome() def get_species_information(db_conn, genome_uuid): - if genome_uuid is None: + if not genome_uuid: + logger.warning("Missing or Empty Genome UUID field.") return msg_factory.create_species() species_results = db_conn.fetch_genomes( @@ -151,13 +175,20 @@ def get_species_information(db_conn, genome_uuid): if len(species_results) == 1: tax_id = species_results[0].Organism.taxonomy_id taxo_results = db_conn.fetch_taxonomy_names(tax_id) - return msg_factory.create_species(species_results[0], taxo_results[tax_id]) + response_data = msg_factory.create_species(species_results[0], taxo_results[tax_id]) + logger.debug(f"Response data: \n{response_data}") + return response_data + elif len(species_results) > 1: + logger.debug("Multiple results returned.") + else: + logger.debug("Genome not found.") return msg_factory.create_species() def get_sub_species_info(db_conn, organism_uuid, group): - if organism_uuid is None: + if not organism_uuid: + logger.warning("Missing or Empty Organism UUID field.") return msg_factory.create_sub_species() sub_species_results = db_conn.fetch_genomes( @@ -175,17 +206,21 @@ def get_sub_species_info(db_conn, organism_uuid, group): if result.OrganismGroup.name not in species_name: species_name.append(result.OrganismGroup.name) - return msg_factory.create_sub_species({ + response_data = msg_factory.create_sub_species({ 'organism_uuid': organism_uuid, 'species_type': species_type, 'species_name': species_name }) + logger.debug(f"Response data: \n{response_data}") + return response_data + logger.debug("No sub-species information was found.") return msg_factory.create_sub_species() def get_genome_uuid(db_conn, ensembl_name, assembly_name, use_default=False): - if ensembl_name is None or assembly_name is None: + # TODO: I'll leave this one as it is because it will change.. I'll add logging to it later + if not ensembl_name: return msg_factory.create_genome_uuid() genome_uuid_result = db_conn.fetch_genomes( @@ -216,8 +251,8 @@ def get_genome_uuid(db_conn, ensembl_name, assembly_name, use_default=False): def get_genome_by_uuid(db_conn, genome_uuid, release_version): - if genome_uuid is None or not genome_uuid: - logger.debug("Missing or Empty Genome UUID field.") + if not genome_uuid: + logger.warning("Missing or Empty Genome UUID field.") return msg_factory.create_genome() genome_results = db_conn.fetch_genomes( @@ -242,6 +277,7 @@ def get_genome_by_uuid(db_conn, genome_uuid, release_version): def get_genomes_by_keyword_iterator(db_conn, keyword, release_version): if not keyword: + logger.warning("Missing or Empty Keyword field.") return msg_factory.create_genome() genome_results = db_conn.fetch_genome_by_keyword( @@ -262,13 +298,16 @@ def get_genomes_by_keyword_iterator(db_conn, keyword, release_version): most_recent_genomes.append(most_recent_genome) for genome_row in most_recent_genomes: + logging.debug(f"Processing genome: {genome_row.Genome.genome_uuid}") yield msg_factory.create_genome(data=genome_row) - return msg_factory.create_genome() + logger.debug("No genomes were found.") + return msg_factory.create_genome() def get_genome_by_name(db_conn, ensembl_name, site_name, release_version): - if ensembl_name is None and site_name is None: + if not ensembl_name and not site_name: + logger.warning("Missing or Empty ensembl_name and site_name field.") return msg_factory.create_genome() genome_results = db_conn.fetch_genomes( @@ -278,15 +317,22 @@ def get_genome_by_name(db_conn, ensembl_name, site_name, release_version): allow_unreleased=cfg.allow_unreleased ) if len(genome_results) == 1: - return create_genome_with_attributes_and_count( + response_data = create_genome_with_attributes_and_count( db_conn=db_conn, genome=genome_results[0], release_version=release_version ) + logger.debug(f"Response data: \n{response_data}") + return response_data + elif len(genome_results) > 1: + logger.debug("Multiple results returned.") + else: + logger.debug("Genome not found.") return msg_factory.create_genome() def get_datasets_list_by_uuid(db_conn, genome_uuid, release_version): - if genome_uuid is None: + if not genome_uuid: + logger.warning("Missing or Empty Genome UUID field.") return msg_factory.create_datasets() datasets_results = db_conn.fetch_genome_datasets( @@ -319,16 +365,20 @@ def get_datasets_list_by_uuid(db_conn, genome_uuid, release_version): dataset_infos=ds_obj_dict[dataset_type_key] ) - return msg_factory.create_datasets({ + response_data = msg_factory.create_datasets({ 'genome_uuid': genome_uuid, 'datasets': dataset_object_dict }) + logger.debug(f"Response data: \n{response_data}") + return response_data + logger.debug("No datasets found.") return msg_factory.create_datasets() def genome_sequence_iterator(db_conn, genome_uuid, chromosomal_only): - if genome_uuid is None: + if not genome_uuid: + logger.warning("Missing or Empty Genome UUID field.") return assembly_sequence_results = db_conn.fetch_sequences( @@ -336,11 +386,13 @@ def genome_sequence_iterator(db_conn, genome_uuid, chromosomal_only): chromosomal_only=chromosomal_only, ) for result in assembly_sequence_results: + logging.debug(f"Processing assembly: {result.AssemblySequence.name}") yield msg_factory.create_genome_sequence(result) def assembly_region_iterator(db_conn, genome_uuid, chromosomal_only): - if genome_uuid is None: + if not genome_uuid: + logger.warning("Missing or Empty Genome UUID field.") return assembly_sequence_results = db_conn.fetch_sequences( @@ -348,11 +400,13 @@ def assembly_region_iterator(db_conn, genome_uuid, chromosomal_only): chromosomal_only=chromosomal_only, ) for result in assembly_sequence_results: + logging.debug(f"Processing assembly: {result.AssemblySequence.name}") yield msg_factory.create_assembly_region(result) def genome_assembly_sequence_region(db_conn, genome_uuid, sequence_region_name): - if genome_uuid is None or sequence_region_name is None: + if not genome_uuid or not sequence_region_name: + logger.warning("Missing or Empty Genome UUID or Sequence region name field.") return msg_factory.create_genome_assembly_sequence_region() assembly_sequence_results = db_conn.fetch_sequences( @@ -360,8 +414,14 @@ def genome_assembly_sequence_region(db_conn, genome_uuid, sequence_region_name): assembly_sequence_name=sequence_region_name ) if len(assembly_sequence_results) == 1: - return msg_factory.create_genome_assembly_sequence_region(assembly_sequence_results[0]) + response_data = msg_factory.create_genome_assembly_sequence_region(assembly_sequence_results[0]) + logger.debug(f"Response data: \n{response_data}") + return response_data + elif len(assembly_sequence_results) > 1: + logger.debug("Multiple results returned.") + else: + logger.debug("Assembly sequence not found.") return msg_factory.create_genome_assembly_sequence_region() @@ -379,11 +439,12 @@ def release_iterator(metadata_db, site_name, release_version, current_only): ) for result in release_results: + logging.debug(f"Processing release: {result.EnsemblRelease.version if hasattr(result, 'EnsemblRelease') else None}") yield msg_factory.create_release(result) def release_by_uuid_iterator(metadata_db, genome_uuid): - if genome_uuid is None: + if not genome_uuid: return conn = ReleaseAdaptor(metadata_uri=cfg.metadata_uri) @@ -392,11 +453,13 @@ def release_by_uuid_iterator(metadata_db, genome_uuid): ) for result in release_results: + logging.debug(f"Processing release: {result.EnsemblRelease.version if hasattr(result, 'EnsemblRelease') else None}") yield msg_factory.create_release(result) def get_dataset_by_genome_and_dataset_type(db_conn, genome_uuid, requested_dataset_type): - if genome_uuid is None: + if not genome_uuid: + logger.warning("Missing or Empty Genome UUID field.") return msg_factory.create_dataset_infos() dataset_results = db_conn.fetch_genome_datasets( @@ -404,16 +467,20 @@ def get_dataset_by_genome_and_dataset_type(db_conn, genome_uuid, requested_datas dataset_type=requested_dataset_type, dataset_attributes=True ) - return msg_factory.create_dataset_infos(genome_uuid, requested_dataset_type, dataset_results) + response_data = msg_factory.create_dataset_infos(genome_uuid, requested_dataset_type, dataset_results) + logger.debug(f"Response data: \n{response_data}") + return response_data def get_organisms_group_count(db_conn, release_version): count_result = db_conn.fetch_organisms_group_counts(release_version=release_version) - return msg_factory.create_organisms_group_count(count_result, release_version) + response_data = msg_factory.create_organisms_group_count(count_result, release_version) + logger.debug(f"Response data: \n{response_data}") + return response_data def get_genome_uuid_by_tag(db_conn, genome_tag): - if genome_tag is None: + if not genome_tag: return msg_factory.create_genome_uuid() genome_uuid_result = db_conn.fetch_genomes( @@ -422,7 +489,14 @@ def get_genome_uuid_by_tag(db_conn, genome_tag): ) if len(genome_uuid_result) == 1: - return msg_factory.create_genome_uuid( + response_data = msg_factory.create_genome_uuid( {"genome_uuid": genome_uuid_result[0].Genome.genome_uuid} ) + logger.debug(f"Response data: \n{response_data}") + return response_data + + elif len(genome_uuid_result) > 1: + logger.debug("Multiple results returned.") + else: + logger.debug("No Genome UUID found.") return msg_factory.create_genome_uuid() From f0cd06ce6f3844e58ecf97cb10e2885948df7ce4 Mon Sep 17 00:00:00 2001 From: Bilal Date: Thu, 25 Jan 2024 10:58:40 +0000 Subject: [PATCH 3/5] turn DEBUG on and off using env var --- src/ensembl/production/metadata/grpc/config.py | 1 + src/ensembl/production/metadata/grpc/service.py | 8 +++++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/ensembl/production/metadata/grpc/config.py b/src/ensembl/production/metadata/grpc/config.py index 306b71fb..a2831b5b 100644 --- a/src/ensembl/production/metadata/grpc/config.py +++ b/src/ensembl/production/metadata/grpc/config.py @@ -54,3 +54,4 @@ class MetadataConfig: max_overflow = os.environ.get("MAX_OVERFLOW", 0) pool_recycle = os.environ.get("POOL_RECYCLE", 50) allow_unreleased = parse_boolean_var(os.environ.get("ALLOW_UNRELEASED", False)) + debug_mode = parse_boolean_var(os.environ.get("DEBUG", False)) \ No newline at end of file diff --git a/src/ensembl/production/metadata/grpc/service.py b/src/ensembl/production/metadata/grpc/service.py index 7bf23e43..95d9abd1 100644 --- a/src/ensembl/production/metadata/grpc/service.py +++ b/src/ensembl/production/metadata/grpc/service.py @@ -13,12 +13,17 @@ import grpc import logging +from ensembl.production.metadata.grpc.config import MetadataConfig as cfg from ensembl.production.metadata.grpc import ensembl_metadata_pb2_grpc from ensembl.production.metadata.grpc.servicer import EnsemblMetadataServicer logger = logging.getLogger(__name__) + +# Determine the logging level based on the value of cfg.debug_mode +log_level = logging.DEBUG if cfg.debug_mode else logging.INFO + logging.basicConfig( - level=logging.DEBUG, + level=log_level, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' ) @@ -40,4 +45,5 @@ def serve(): if __name__ == "__main__": logger.info("gRPC server starting on port 50051...") + logger.info(f"DEBUG: {cfg.debug_mode}") serve() From de156afc969677c787b55b8771fa6e5a6581be9a Mon Sep 17 00:00:00 2001 From: Bilal Date: Thu, 25 Jan 2024 17:55:27 +0000 Subject: [PATCH 4/5] set default logging level to WARNING instead of INFO --- src/ensembl/production/metadata/grpc/service.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ensembl/production/metadata/grpc/service.py b/src/ensembl/production/metadata/grpc/service.py index 95d9abd1..4c853211 100644 --- a/src/ensembl/production/metadata/grpc/service.py +++ b/src/ensembl/production/metadata/grpc/service.py @@ -20,7 +20,7 @@ logger = logging.getLogger(__name__) # Determine the logging level based on the value of cfg.debug_mode -log_level = logging.DEBUG if cfg.debug_mode else logging.INFO +log_level = logging.DEBUG if cfg.debug_mode else logging.WARNING logging.basicConfig( level=log_level, From eebc1f82c98139cec3d1e054504f2d9938c71a4d Mon Sep 17 00:00:00 2001 From: Bilal Date: Thu, 25 Jan 2024 17:58:02 +0000 Subject: [PATCH 5/5] changed logger.info to debug in servicer.py --- .../production/metadata/grpc/servicer.py | 40 +++++++++---------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/src/ensembl/production/metadata/grpc/servicer.py b/src/ensembl/production/metadata/grpc/servicer.py index 0e24800c..d31c1bc8 100644 --- a/src/ensembl/production/metadata/grpc/servicer.py +++ b/src/ensembl/production/metadata/grpc/servicer.py @@ -23,103 +23,103 @@ def __init__(self): self.db = utils.connect_to_db() def GetSpeciesInformation(self, request, context): - logger.info(f"Received RPC for GetSpeciesInformation with request: {request}") + logger.debug(f"Received RPC for GetSpeciesInformation with request: {request}") return utils.get_species_information(self.db, request.genome_uuid) def GetAssemblyInformation(self, request, context): - logger.info(f"Received RPC for GetAssemblyInformation with request: {request}") + logger.debug(f"Received RPC for GetAssemblyInformation with request: {request}") return utils.get_assembly_information(self.db, request.assembly_uuid) def GetGenomesByAssemblyAccessionID(self, request, context): - logger.info(f"Received RPC for GetGenomesByAssemblyAccessionID with request: {request}") + logger.debug(f"Received RPC for GetGenomesByAssemblyAccessionID with request: {request}") return utils.get_genomes_from_assembly_accession_iterator( self.db, request.assembly_accession, request.release_version ) def GetSubSpeciesInformation(self, request, context): - logger.info(f"Received RPC for GetSubSpeciesInformation with request: {request}") + logger.debug(f"Received RPC for GetSubSpeciesInformation with request: {request}") return utils.get_sub_species_info(self.db, request.organism_uuid, request.group) def GetTopLevelStatistics(self, request, context): - logger.info(f"Received RPC for GetTopLevelStatistics with request: {request}") + logger.debug(f"Received RPC for GetTopLevelStatistics with request: {request}") return utils.get_top_level_statistics(self.db, request.organism_uuid, request.group) def GetTopLevelStatisticsByUUID(self, request, context): - logger.info(f"Received RPC for GetTopLevelStatisticsByUUID with request: {request}") + logger.debug(f"Received RPC for GetTopLevelStatisticsByUUID with request: {request}") return utils.get_top_level_statistics_by_uuid(self.db, request.genome_uuid) def GetGenomeUUID(self, request, context): - logger.info(f"Received RPC for GetGenomeUUID with request: {request}") + logger.debug(f"Received RPC for GetGenomeUUID with request: {request}") return utils.get_genome_uuid(self.db, request.production_name, request.assembly_name, request.use_default) def GetGenomeByUUID(self, request, context): - logger.info(f"Received RPC for GetGenomeByUUID with request: {request}") + logger.debug(f"Received RPC for GetGenomeByUUID with request: {request}") return utils.get_genome_by_uuid(self.db, request.genome_uuid, request.release_version) def GetGenomesByKeyword(self, request, context): - logger.info(f"Received RPC for GetGenomesByKeyword with request: {request}") + logger.debug(f"Received RPC for GetGenomesByKeyword with request: {request}") return utils.get_genomes_by_keyword_iterator( self.db, request.keyword, request.release_version ) def GetGenomeByName(self, request, context): - logger.info(f"Received RPC for GetGenomeByName with request: {request}") + logger.debug(f"Received RPC for GetGenomeByName with request: {request}") return utils.get_genome_by_name( self.db, request.ensembl_name, request.site_name, request.release_version ) def GetRelease(self, request, context): - logger.info(f"Received RPC for GetRelease with request: {request}") + logger.debug(f"Received RPC for GetRelease with request: {request}") return utils.release_iterator( self.db, request.site_name, request.release_version, request.current_only ) def GetReleaseByUUID(self, request, context): - logger.info(f"Received RPC for GetReleaseByUUID with request: {request}") + logger.debug(f"Received RPC for GetReleaseByUUID with request: {request}") return utils.release_by_uuid_iterator(self.db, request.genome_uuid) def GetGenomeSequence(self, request, context): - logger.info(f"Received RPC for GetGenomeSequence with request: {request}") + logger.debug(f"Received RPC for GetGenomeSequence with request: {request}") return utils.genome_sequence_iterator( self.db, request.genome_uuid, request.chromosomal_only ) def GetAssemblyRegion(self, request, context): - logger.info(f"Received RPC for GetAssemblyRegion with request: {request}") + logger.debug(f"Received RPC for GetAssemblyRegion with request: {request}") return utils.assembly_region_iterator( self.db, request.genome_uuid, request.chromosomal_only ) def GetGenomeAssemblySequenceRegion(self, request, context): - logger.info(f"Received RPC for GetGenomeAssemblySequenceRegion with request: {request}") + logger.debug(f"Received RPC for GetGenomeAssemblySequenceRegion with request: {request}") return utils.genome_assembly_sequence_region( self.db, request.genome_uuid, request.sequence_region_name ) def GetDatasetsListByUUID(self, request, context): - logger.info(f"Received RPC for GetDatasetsListByUUID with request: {request}") + logger.debug(f"Received RPC for GetDatasetsListByUUID with request: {request}") return utils.get_datasets_list_by_uuid( self.db, request.genome_uuid, request.release_version ) def GetDatasetInformation(self, request, context): - logger.info(f"Received RPC for GetDatasetInformation with request: {request}") + logger.debug(f"Received RPC for GetDatasetInformation with request: {request}") return utils.get_dataset_by_genome_and_dataset_type( self.db, request.genome_uuid, request.dataset_type ) def GetOrganismsGroupCount(self, request, context): - logger.info(f"Received RPC for GetOrganismsGroupCount with request: {request}") + logger.debug(f"Received RPC for GetOrganismsGroupCount with request: {request}") return utils.get_organisms_group_count( self.db, request.release_version ) def GetGenomeUUIDByTag(self, request, context): - logger.info(f"Received RPC for GetGenomeUUIDByTag with request: {request}") + logger.debug(f"Received RPC for GetGenomeUUIDByTag with request: {request}") return utils.get_genome_uuid_by_tag(self.db, request.genome_tag) def GetReleaseVersionByUUID(self, request, context): - logger.info(f"Received RPC for GetReleaseVersionByUUID with request: {request}") + logger.debug(f"Received RPC for GetReleaseVersionByUUID with request: {request}") return utils.get_release_version_by_uuid( self.db, request.genome_uuid, request.dataset_type, request.release_version )