Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

don't create run ids if dependencies for data store are missing i.e. fail fast #1666

Merged
merged 5 commits into from
Jan 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion metaflow/plugins/azure/azure_exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,4 @@ class MetaflowAzureResourceError(MetaflowException):


class MetaflowAzurePackageError(MetaflowException):
headline = "Missing required packages azure-identity and azure-storage-blob"
headline = "Missing required packages 'azure-identity' and 'azure-storage-blob'"
3 changes: 1 addition & 2 deletions metaflow/plugins/datastores/azure_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,9 +248,8 @@ def list_content_single(self, path):
class AzureStorage(DataStoreStorage):
TYPE = "azure"

@check_azure_deps
def __init__(self, root=None):
# cannot decorate __init__... invoke it with dummy decoratee
check_azure_deps(lambda: 0)
super(AzureStorage, self).__init__(root)
self._tmproot = ARTIFACT_LOCALROOT
self._default_scope_token = None
Expand Down
3 changes: 1 addition & 2 deletions metaflow/plugins/datastores/gs_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,9 +145,8 @@ def load_bytes_single(self, tmpdir, key):
class GSStorage(DataStoreStorage):
TYPE = "gs"

@check_gs_deps
def __init__(self, root=None):
# cannot decorate __init__... invoke it with dummy decoratee
check_gs_deps(lambda: 0)
super(GSStorage, self).__init__(root)
self._tmproot = ARTIFACT_LOCALROOT
self._root_client = None
Expand Down
3 changes: 2 additions & 1 deletion metaflow/plugins/datastores/s3_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from itertools import starmap

from metaflow.plugins.datatools.s3.s3 import S3, S3Client, S3PutObject
from metaflow.plugins.datatools.s3.s3 import S3, S3Client, S3PutObject, check_s3_deps
from metaflow.metaflow_config import DATASTORE_SYSROOT_S3, ARTIFACT_LOCALROOT
from metaflow.datastore.datastore_storage import CloseAfterUse, DataStoreStorage

Expand All @@ -18,6 +18,7 @@
class S3Storage(DataStoreStorage):
TYPE = "s3"

@check_s3_deps
def __init__(self, root=None):
super(S3Storage, self).__init__(root)
self.s3_client = S3Client()
Expand Down
35 changes: 24 additions & 11 deletions metaflow/plugins/datatools/s3/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,15 +51,25 @@
if TYPE_CHECKING:
from metaflow.client import Run

try:
import boto3
from boto3.s3.transfer import TransferConfig

DOWNLOAD_FILE_THRESHOLD = 2 * TransferConfig().multipart_threshold
DOWNLOAD_MAX_CHUNK = 2 * 1024 * 1024 * 1024 - 1
boto_found = True
except:
boto_found = False
def _check_and_init_s3_deps():
try:
import boto3
from boto3.s3.transfer import TransferConfig
except (ImportError, ModuleNotFoundError):
raise MetaflowException("You need to install 'boto3' in order to use S3.")


def check_s3_deps(func):
"""The decorated function checks S3 dependencies (as needed for AWS S3 storage backend).
This includes boto3.
"""

def _inner_func(*args, **kwargs):
_check_and_init_s3_deps()
return func(*args, **kwargs)

return _inner_func


TEST_INJECT_RETRYABLE_FAILURES = int(
Expand Down Expand Up @@ -498,6 +508,7 @@ class S3(object):
def get_root_from_config(cls, echo, create_on_absent=True):
return DATATOOLS_S3ROOT

@check_s3_deps
def __init__(
self,
tmproot: str = TEMPDIR,
Expand All @@ -508,9 +519,6 @@ def __init__(
encryption: Optional[str] = S3_SERVER_SIDE_ENCRYPTION,
**kwargs
):
if not boto_found:
raise MetaflowException("You need to install 'boto3' in order to use S3.")

if run:
# 1. use a (current) run ID with optional customizations
if DATATOOLS_S3ROOT is None:
Expand Down Expand Up @@ -875,6 +883,11 @@ def get(
`S3Object`
An S3Object corresponding to the object requested.
"""
from boto3.s3.transfer import TransferConfig

DOWNLOAD_FILE_THRESHOLD = 2 * TransferConfig().multipart_threshold
DOWNLOAD_MAX_CHUNK = 2 * 1024 * 1024 * 1024 - 1

url, r = self._url_and_range(key)
src = urlparse(url)

Expand Down
2 changes: 1 addition & 1 deletion metaflow/plugins/gcp/gs_exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@


class MetaflowGSPackageError(MetaflowException):
headline = "Missing required packages google-cloud-storage google-auth"
headline = "Missing required packages 'google-cloud-storage' and 'google-auth'"
2 changes: 1 addition & 1 deletion metaflow/plugins/gcp/gs_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def _check_and_init_gs_deps():


def check_gs_deps(func):
"""The decorated function checks GS dependencies (as needed for Azure storage backend). This includes
"""The decorated function checks GS dependencies (as needed for Google Cloud storage backend). This includes
various GCP SDK packages, as well as a Python version of >=3.7
"""

Expand Down