|
2 | 2 |
|
3 | 3 | from log import Log |
4 | 4 |
|
5 | | -kaggle_proxy_token = os.getenv("KAGGLE_DATA_PROXY_TOKEN") |
6 | | -kernel_integrations_var = os.getenv("KAGGLE_KERNEL_INTEGRATIONS") |
7 | | - |
8 | | -def init(): |
9 | | - is_jwe_set = "KAGGLE_USER_SECRETS_TOKEN" in os.environ |
10 | | - if kaggle_proxy_token or is_jwe_set: |
11 | | - init_bigquery() |
12 | | - if is_jwe_set: |
13 | | - from kaggle_gcp import get_integrations |
14 | | - if get_integrations().has_gcs(): |
15 | | - init_gcs() |
16 | | - |
17 | | - |
18 | | -def init_bigquery(): |
19 | | - from google.auth import environment_vars |
20 | | - from google.cloud import bigquery |
21 | | - # TODO: Update this to the correct kaggle.gcp path once we no longer inject modules |
22 | | - # from the worker. |
23 | | - from kaggle_gcp import get_integrations, PublicBigqueryClient, KaggleKernelCredentials |
24 | | - |
25 | | - # If this Kernel has bigquery integration on startup, preload the Kaggle Credentials |
26 | | - # object for magics to work. |
27 | | - if get_integrations().has_bigquery(): |
28 | | - from google.cloud.bigquery import magics |
29 | | - magics.context.credentials = KaggleKernelCredentials() |
30 | | - |
31 | | - def monkeypatch_bq(bq_client, *args, **kwargs): |
32 | | - specified_credentials = kwargs.get('credentials') |
33 | | - has_bigquery = get_integrations().has_bigquery() |
34 | | - # Prioritize passed in project id, but if it is missing look for env var. |
35 | | - arg_project = kwargs.get('project') |
36 | | - explicit_project_id = arg_project or os.environ.get(environment_vars.PROJECT) |
37 | | - # This is a hack to get around the bug in google-cloud library. |
38 | | - # Remove these two lines once this is resolved: |
39 | | - # https://github.com/googleapis/google-cloud-python/issues/8108 |
40 | | - if explicit_project_id: |
41 | | - Log.info(f"Explicit project set to {explicit_project_id}") |
42 | | - kwargs['project'] = explicit_project_id |
43 | | - if explicit_project_id is None and specified_credentials is None and not has_bigquery: |
44 | | - msg = "Using Kaggle's public dataset BigQuery integration." |
45 | | - Log.info(msg) |
46 | | - print(msg) |
47 | | - return PublicBigqueryClient(*args, **kwargs) |
48 | | - |
49 | | - else: |
50 | | - if specified_credentials is None: |
51 | | - Log.info("No credentials specified, using KaggleKernelCredentials.") |
52 | | - kwargs['credentials'] = KaggleKernelCredentials() |
53 | | - if (not has_bigquery): |
54 | | - Log.info("No bigquery integration found, creating client anyways.") |
55 | | - print('Please ensure you have selected a BigQuery ' |
56 | | - 'account in the Kernels Settings sidebar.') |
57 | | - return bq_client(*args, **kwargs) |
58 | | - |
59 | | - # Monkey patches BigQuery client creation to use proxy or user-connected GCP account. |
60 | | - # Deprecated in favor of Kaggle.DataProxyClient(). |
61 | | - # TODO: Remove this once uses have migrated to that new interface. |
62 | | - bq_client = bigquery.Client |
63 | | - bigquery.Client = lambda *args, **kwargs: monkeypatch_bq( |
64 | | - bq_client, *args, **kwargs) |
65 | | - |
66 | | - |
67 | | -def init_gcs(): |
68 | | - from kaggle_secrets import GcpTarget |
69 | | - from kaggle_gcp import KaggleKernelCredentials |
70 | | - from google.cloud import storage |
71 | | - def monkeypatch_gcs(gcs_client, *args, **kwargs): |
72 | | - specified_credentials = kwargs.get('credentials') |
73 | | - if specified_credentials is None: |
74 | | - Log.info("No credentials specified, using KaggleKernelCredentials.") |
75 | | - kwargs['credentials'] = KaggleKernelCredentials(target=GcpTarget.GCS) |
76 | | - return gcs_client(*args, **kwargs) |
77 | | - |
78 | | - gcs_client = storage.Client |
79 | | - storage.Client = lambda *args, **kwargs: monkeypatch_gcs(gcs_client, *args, **kwargs) |
80 | | - |
81 | | - |
82 | | -init() |
| 5 | +import sys |
| 6 | +import importlib |
| 7 | +import importlib.machinery |
| 8 | + |
| 9 | +class GcpModuleFinder(importlib.abc.MetaPathFinder): |
| 10 | + _MODULES = ['google.cloud.bigquery', 'google.cloud.storage'] |
| 11 | + _KAGGLE_GCP_PATH = 'kaggle_gcp.py' |
| 12 | + def __init__(self): |
| 13 | + pass |
| 14 | + |
| 15 | + def _is_called_from_kaggle_gcp(self): |
| 16 | + import inspect |
| 17 | + for frame in inspect.stack(): |
| 18 | + if os.path.basename(frame.filename) == self._KAGGLE_GCP_PATH: |
| 19 | + return True |
| 20 | + return False |
| 21 | + |
| 22 | + def find_spec(self, fullname, path, target=None): |
| 23 | + if fullname in self._MODULES: |
| 24 | + # If being called from kaggle_gcp, don't return our |
| 25 | + # monkeypatched module to avoid circular dependency, |
| 26 | + # since we call kaggle_gcp to load the module. |
| 27 | + if self._is_called_from_kaggle_gcp(): |
| 28 | + return None |
| 29 | + return importlib.machinery.ModuleSpec(fullname, GcpModuleLoader()) |
| 30 | + |
| 31 | + |
| 32 | +class GcpModuleLoader(importlib.abc.Loader): |
| 33 | + def __init__(self): |
| 34 | + pass |
| 35 | + |
| 36 | + def create_module(self, spec): |
| 37 | + """Create the gcp module from the spec. |
| 38 | + """ |
| 39 | + import kaggle_gcp |
| 40 | + _LOADERS = { |
| 41 | + 'google.cloud.bigquery': kaggle_gcp.init_bigquery, |
| 42 | + 'google.cloud.storage': kaggle_gcp.init_gcs |
| 43 | + } |
| 44 | + monkeypatch_gcp_module = _LOADERS[spec.name]() |
| 45 | + return monkeypatch_gcp_module |
| 46 | + |
| 47 | + def exec_module(self, module): |
| 48 | + pass |
| 49 | + |
| 50 | + |
| 51 | +if not hasattr(sys, 'frozen'): |
| 52 | + sys.meta_path.insert(0, GcpModuleFinder()) |
0 commit comments