diff --git a/Dockerfile.tmpl b/Dockerfile.tmpl index 20acac5c..ba382531 100644 --- a/Dockerfile.tmpl +++ b/Dockerfile.tmpl @@ -164,7 +164,6 @@ ADD patches/kaggle_gcp.py \ patches/kaggle_session.py \ patches/kaggle_web_client.py \ patches/kaggle_datasets.py \ - patches/log.py \ $PACKAGE_PATH/ # Figure out why this is in a different place? diff --git a/patches/kaggle_gcp.py b/patches/kaggle_gcp.py index 64a4611f..4cb98858 100644 --- a/patches/kaggle_gcp.py +++ b/patches/kaggle_gcp.py @@ -1,5 +1,6 @@ import os import inspect +import logging from google.auth import credentials, environment_vars from google.auth.exceptions import RefreshError from google.api_core.gapic_v1.client_info import ClientInfo @@ -8,8 +9,6 @@ from google.cloud.bigquery._http import Connection from kaggle_secrets import GcpTarget, UserSecretsClient -from log import Log - KAGGLE_GCP_CLIENT_USER_AGENT="kaggle-gcp-client/1.0" def get_integrations(): @@ -22,7 +21,7 @@ def get_integrations(): target = GcpTarget[integration.upper()] kernel_integrations.add_integration(target) except KeyError as e: - Log.error(f"Unknown integration target: {integration.upper()}") + logging.debug(f"Unknown integration target: {integration.upper()}") return kernel_integrations @@ -66,14 +65,14 @@ def refresh(self, request): elif self.target == GcpTarget.CLOUDAI: self.token, self.expiry = client._get_cloudai_access_token() except ConnectionError as e: - Log.error(f"Connection error trying to refresh access token: {e}") + logging.error(f"Connection error trying to refresh access token: {e}") print("There was a connection error trying to fetch the access token. " f"Please ensure internet is on in order to use the {self.target.service} Integration.") raise RefreshError('Unable to refresh access token due to connection error.') from e except Exception as e: - Log.error(f"Error trying to refresh access token: {e}") + logging.error(f"Error trying to refresh access token: {e}") if (not get_integrations().has_integration(self.target)): - Log.error(f"No {self.target.service} integration found.") + logging.error(f"No {self.target.service} integration found.") print( f"Please ensure you have selected a {self.target.service} account in the Notebook Add-ons menu.") raise RefreshError('Unable to refresh access token.') from e @@ -102,7 +101,7 @@ def api_request(self, *args, **kwargs): msg = ("Permission denied using Kaggle's public BigQuery integration. " "Did you mean to select a BigQuery account in the Notebook Add-ons menu?") print(msg) - Log.info(msg) + logging.info(msg) raise e @@ -156,23 +155,23 @@ def monkeypatch_bq(bq_client, *args, **kwargs): # Remove these two lines once this is resolved: # https://github.com/googleapis/google-cloud-python/issues/8108 if explicit_project_id: - Log.info(f"Explicit project set to {explicit_project_id}") + logging.info(f"Explicit project set to {explicit_project_id}") kwargs['project'] = explicit_project_id if explicit_project_id is None and specified_credentials is None and not has_bigquery: msg = "Using Kaggle's public dataset BigQuery integration." - Log.info(msg) + logging.info(msg) print(msg) return PublicBigqueryClient(*args, **kwargs) else: if specified_credentials is None: - Log.info("No credentials specified, using KaggleKernelCredentials.") + logging.info("No credentials specified, using KaggleKernelCredentials.") kwargs['credentials'] = KaggleKernelCredentials() if (not has_bigquery): - Log.info("No bigquery integration found, creating client anyways.") + logging.info("No bigquery integration found, creating client anyways.") print('Please ensure you have selected a BigQuery ' 'account in the Notebook Add-ons menu.') if explicit_project_id is None: - Log.info("No project specified while using the unmodified client.") + logging.info("No project specified while using the unmodified client.") print('Please ensure you specify a project id when creating the client' ' in order to use your BigQuery account.') kwargs['client_info'] = set_kaggle_user_agent(kwargs.get('client_info')) @@ -196,20 +195,20 @@ def monkeypatch_aiplatform_init(aiplatform_klass, kaggle_kernel_credentials): def patched_init(*args, **kwargs): specified_credentials = kwargs.get('credentials') if specified_credentials is None: - Log.info("No credentials specified, using KaggleKernelCredentials.") + logging.info("No credentials specified, using KaggleKernelCredentials.") kwargs['credentials'] = kaggle_kernel_credentials return aiplatform_init(*args, **kwargs) if (not has_been_monkeypatched(aiplatform_klass.init)): aiplatform_klass.init = patched_init - Log.info("aiplatform.init patched") + logging.info("aiplatform.init patched") def monkeypatch_client(client_klass, kaggle_kernel_credentials): client_init = client_klass.__init__ def patched_init(self, *args, **kwargs): specified_credentials = kwargs.get('credentials') if specified_credentials is None: - Log.info("No credentials specified, using KaggleKernelCredentials.") + logging.info("No credentials specified, using KaggleKernelCredentials.") # Some GCP services demand the billing and target project must be the same. # To avoid using default service account based credential as caller credential # user need to provide ClientOptions with quota_project_id: @@ -227,7 +226,7 @@ def patched_init(self, *args, **kwargs): if (not has_been_monkeypatched(client_klass.__init__)): client_klass.__init__ = patched_init - Log.info(f"Client patched: {client_klass}") + logging.info(f"Client patched: {client_klass}") def set_kaggle_user_agent(client_info: ClientInfo): # Add kaggle client user agent in order to attribute usage. @@ -360,4 +359,4 @@ def init(): # google.cloud.* and kaggle_gcp. By calling init here, we guarantee # that regardless of the original import that caused google.cloud.* to be # loaded, the monkeypatching will be done. -init() +init() \ No newline at end of file diff --git a/patches/log.py b/patches/log.py deleted file mode 100644 index 59a07c8c..00000000 --- a/patches/log.py +++ /dev/null @@ -1,133 +0,0 @@ -import io -import logging -import os - -import google.auth - - -_LOG_TO_FILE_ENV = os.getenv("KAGGLE_LOG_TO_FILE") - - -class _LogFormatter(logging.Formatter): - """A logging formatter which truncates long messages.""" - - _MAX_LOG_LENGTH = 10000 # Be generous, not to truncate long backtraces. - - def format(self, record): - msg = super(_LogFormatter, self).format(record) - return msg[:_LogFormatter._MAX_LOG_LENGTH] if msg else msg - -# TODO(vimota): Clean this up once we're using python 3.8 and can use -# (https://github.com/python/cpython/commit/dde9fdbe453925279ac3d2a6a72102f6f9ef247c) -# Right now, making the logging module display the intended frame's information -# when the logging calls (info, warn, ...) are wrapped (as is the case in our -# Log class) involves fragile logic. -class _Logger(logging.Logger): - - # This is a copy of logging.Logger.findCaller with the filename ignore - # set expanded to include the current filename (".../log.py"). - # Copyright 2001-2015 by Vinay Sajip. All Rights Reserved. - # License: https://github.com/python/cpython/blob/ce9e62544571e7ade7186697d5dd065fb4c5243f/LICENSE - def findCaller(self, stack_info=False, stacklevel=1): - f = logging.currentframe() - f = f.f_back - rv = "(unknown file)", 0, "(unknown function)", None - while hasattr(f, "f_code"): - co = f.f_code - filename = os.path.normcase(co.co_filename) - if filename in _ignore_srcfiles: - f = f.f_back - continue - sinfo = None - if stack_info: - sio = io.StringIO() - sio.write('Stack (most recent call last):\n') - traceback.print_stack(f, file=sio) - sinfo = sio.getvalue() - if sinfo[-1] == '\n': - sinfo = sinfo[:-1] - sio.close() - rv = (co.co_filename, f.f_lineno, co.co_name, sinfo) - break - return rv - - -_srcfile = os.path.normcase(_Logger.findCaller.__code__.co_filename) -_ignore_srcfiles = (_srcfile, logging._srcfile) - -class Log: - """ Helper aggregate for all things related to logging activity. """ - - _GLOBAL_LOG = logging.getLogger("") - _initialized = False - - # These are convenience helpers. For performance, consider saving Log.get_logger() and using that - @staticmethod - def critical(msg, *args, **kwargs): - Log._GLOBAL_LOG.critical(msg, *args, **kwargs) - - @staticmethod - def fatal(msg, *args, **kwargs): - Log._GLOBAL_LOG.fatal(msg, *args, **kwargs) - - @staticmethod - def exception(msg, *args, **kwargs): - Log._GLOBAL_LOG.exception(msg, *args, **kwargs) - - @staticmethod - def error(msg, *args, **kwargs): - Log._GLOBAL_LOG.error(msg, *args, **kwargs) - - @staticmethod - def warn(msg, *args, **kwargs): - Log._GLOBAL_LOG.warn(msg, *args, **kwargs) - - @staticmethod - def warning(msg, *args, **kwargs): - Log._GLOBAL_LOG.warning(msg, *args, **kwargs) - - @staticmethod - def debug(msg, *args, **kwargs): - Log._GLOBAL_LOG.debug(msg, *args, **kwargs) - - @staticmethod - def info(msg, *args, **kwargs): - Log._GLOBAL_LOG.info(msg, *args, **kwargs) - - @staticmethod - def set_level(loglevel): - if isinstance(loglevel, int): - Log._GLOBAL_LOG.setLevel(loglevel) - return - elif isinstance(loglevel, str): - # idea from https://docs.python.org/3.5/howto/logging.html#logging-to-a-file - numeric_level = getattr(logging, loglevel.upper(), None) - if isinstance(numeric_level, int): - Log._GLOBAL_LOG.setLevel(numeric_level) - return - - raise ValueError('Invalid log level: %s' % loglevel) - - @staticmethod - def _static_init(): - if Log._initialized: - return - - logging.setLoggerClass(_Logger) - # The root logger's type is unfortunately (and surprisingly) not affected by - # `setLoggerClass`. Monkey patch it instead. TODO(vimota): Remove this, see the TODO - # associated with _Logger. - logging.RootLogger.findCaller = _Logger.findCaller - log_to_file = _LOG_TO_FILE_ENV.lower() in ("yes", "true", "t", "1") if _LOG_TO_FILE_ENV is not None else True - if log_to_file: - handler = logging.FileHandler(filename='/tmp/kaggle.log', mode='w') - else: - handler = logging.StreamHandler() - - # ".1s" is for the first letter: http://stackoverflow.com/a/27453084/1869. - format_string = "%(asctime)s %(levelname).1s %(process)d %(filename)s:%(lineno)d] %(message)s" - handler.setFormatter(_LogFormatter(format_string)) - logging.basicConfig(level=logging.INFO, handlers=[handler]) - Log._initialized = True - -Log._static_init() diff --git a/patches/sitecustomize.py b/patches/sitecustomize.py index b8ae0692..1bb8a1b6 100644 --- a/patches/sitecustomize.py +++ b/patches/sitecustomize.py @@ -1,7 +1,6 @@ +import logging import os -from log import Log - import sys import importlib.abc import importlib