Skip to content

Commit

Permalink
feat(dataset): fail early when external storage not installed (#1239)
Browse files Browse the repository at this point in the history
* feat(dataset): fail early when external storage not installed
  • Loading branch information
m-alisafaee committed May 14, 2020
1 parent e8f1a8b commit e6ea6da
Show file tree
Hide file tree
Showing 15 changed files with 102 additions and 60 deletions.
8 changes: 4 additions & 4 deletions renku/cli/__init__.py
Expand Up @@ -92,7 +92,7 @@
from renku.core.commands.echo import WARNING
from renku.core.commands.migrate import check_for_migration
from renku.core.commands.options import install_completion, \
option_use_external_storage
option_external_storage_requested
from renku.core.commands.version import check_version, print_version
from renku.core.errors import UsageError
from renku.core.management.client import LocalClient
Expand Down Expand Up @@ -175,7 +175,7 @@ def is_allowed_command(ctx):
default=RENKU_HOME,
help='Location of the Renku directory.'
)
@option_use_external_storage
@option_external_storage_requested
@click.option(
'--disable-version-check',
envvar='RENKU_DISABLE_VERSION_CHECK',
Expand All @@ -186,7 +186,7 @@ def is_allowed_command(ctx):
help='Do not periodically check PyPI for a new version of renku.',
)
@click.pass_context
def cli(ctx, path, renku_home, use_external_storage):
def cli(ctx, path, renku_home, external_storage_requested):
"""Check common Renku commands used in various situations."""
renku_path = Path(path) / renku_home
if not renku_path.exists() and not is_allowed_command(ctx):
Expand All @@ -199,7 +199,7 @@ def cli(ctx, path, renku_home, use_external_storage):
ctx.obj = LocalClient(
path=path,
renku_home=renku_home,
use_external_storage=use_external_storage,
external_storage_requested=external_storage_requested,
)

if (
Expand Down
17 changes: 10 additions & 7 deletions renku/cli/init.py
Expand Up @@ -147,7 +147,7 @@
from renku.core.commands.git import set_git_home
from renku.core.commands.init import create_from_template, fetch_template, \
read_template_manifest
from renku.core.commands.options import option_use_external_storage
from renku.core.commands.options import option_external_storage_requested
from renku.core.models.tabulate import tabulate

_GITLAB_CI = '.gitlab-ci.yml'
Expand Down Expand Up @@ -293,12 +293,13 @@ def check_git_user_config():
help='List templates available in the template-source.'
)
@click.option('--force', is_flag=True, help='Override target path.')
@option_use_external_storage
@option_external_storage_requested
@pass_local_client
@click.pass_context
def init(
ctx, client, use_external_storage, path, name, template_id, template_index,
template_source, template_ref, parameter, list_templates, force
ctx, client, external_storage_requested, path, name, template_id,
template_index, template_source, template_ref, parameter, list_templates,
force
):
"""Initialize a project in PATH. Default is current path."""
# verify dirty path
Expand Down Expand Up @@ -389,10 +390,12 @@ def init(

# set local path and storage
store_directory(path)
if not client.use_external_storage:
use_external_storage = False
if not client.external_storage_requested:
external_storage_requested = False
ctx.obj = client = attr.evolve(
client, path=path, use_external_storage=use_external_storage
client,
path=path,
external_storage_requested=external_storage_requested
)
if not is_path_empty(path):
from git import GitCommandError
Expand Down
24 changes: 12 additions & 12 deletions renku/cli/move.py
Expand Up @@ -97,7 +97,7 @@ def fmt_dst(path):

# 3. Manage .gitattributes for external storage.
tracked = tuple()
if client.has_external_storage:
if client.check_external_storage():
tracked = tuple(
path for path, attr in client.find_attr(*files).items()
if attr.get('filter') == 'lfs'
Expand All @@ -111,18 +111,18 @@ def fmt_dst(path):
):
click.edit(filename=str(client.path / '.gitattributes'))

if tracked and client.has_external_storage:
lfs_paths = client.track_paths_in_storage(
*(destinations[path] for path in tracked)
)
show_message = client.get_value('renku', 'show_lfs_message')
if (lfs_paths and (show_message is None or show_message == 'True')):
click.echo(
INFO + 'Adding these files to Git LFS:\n' +
'\t{}'.format('\n\t'.join(lfs_paths)) +
'\nTo disable this message in the future, run:' +
'\n\trenku config show_lfs_message False'
if tracked:
lfs_paths = client.track_paths_in_storage(
*(destinations[path] for path in tracked)
)
show_message = client.get_value('renku', 'show_lfs_message')
if lfs_paths and (show_message is None or show_message == 'True'):
click.echo(
INFO + 'Adding these files to Git LFS:\n' +
'\t{}'.format('\n\t'.join(lfs_paths)) +
'\nTo disable this message in the future, run:' +
'\n\trenku config show_lfs_message False'
)

# 4. Handle symlinks.
dst.parent.mkdir(parents=True, exist_ok=True)
Expand Down
2 changes: 1 addition & 1 deletion renku/cli/remove.py
Expand Up @@ -82,7 +82,7 @@ def fmt_path(path):
dataset.to_yaml()

# 2. Manage .gitattributes for external storage.
if client.has_external_storage:
if client.check_external_storage():
tracked = tuple(
path for path, attr in client.find_attr(*files).items()
if attr.get('filter') == 'lfs'
Expand Down
2 changes: 1 addition & 1 deletion renku/cli/rerun.py
Expand Up @@ -158,7 +158,7 @@ def rerun(client, revision, roots, siblings, inputs, paths):
)

# Don't compute paths if storage is disabled.
if client.has_external_storage:
if client.check_external_storage():
# Make sure all inputs are pulled from a storage.
paths_ = (
path
Expand Down
2 changes: 1 addition & 1 deletion renku/cli/run.py
Expand Up @@ -280,7 +280,7 @@ def run(
with client.with_workflow_storage() as wf:
with factory.watch(client, no_output=no_output) as tool:
# Don't compute paths if storage is disabled.
if client.has_external_storage:
if client.check_external_storage():
# Make sure all inputs are pulled from a storage.
paths_ = (
path for _, path in
Expand Down
2 changes: 1 addition & 1 deletion renku/cli/update.py
Expand Up @@ -173,7 +173,7 @@ def update(client, revision, no_output, siblings, paths):
)

# Don't compute paths if storage is disabled.
if client.has_external_storage:
if client.check_external_storage():
# Make sure all inputs are pulled from a storage.
paths_ = (
path
Expand Down
2 changes: 1 addition & 1 deletion renku/core/commands/checks/storage.py
Expand Up @@ -22,7 +22,7 @@

def check_lfs_info(client):
"""Checks if files in history should be in LFS."""
if not client.has_external_storage:
if not client.check_external_storage():
return True, None

files = client.check_lfs_migrate_info()
Expand Down
2 changes: 1 addition & 1 deletion renku/core/commands/client.py
Expand Up @@ -70,7 +70,7 @@ def new_func(*args, **kwargs):
client = LocalClient(
path=default_path(),
renku_home=RENKU_HOME,
use_external_storage=True,
external_storage_requested=True,
)
ctx = click.Context(click.Command(method))
else:
Expand Down
2 changes: 1 addition & 1 deletion renku/core/commands/clone.py
Expand Up @@ -37,7 +37,7 @@ def project_clone(
checkout_rev=None,
):
"""Clone Renku project repo, install Git hooks and LFS."""
install_lfs = client.use_external_storage
install_lfs = client.external_storage_requested

return clone(
url=url,
Expand Down
4 changes: 2 additions & 2 deletions renku/core/commands/options.py
Expand Up @@ -183,8 +183,8 @@ def option_siblings(func):
return option_check_siblings(option_with_siblings(func))


option_use_external_storage = click.option(
'use_external_storage',
option_external_storage_requested = click.option(
'external_storage_requested',
'--external-storage/--no-external-storage',
' /-S',
is_flag=True,
Expand Down
8 changes: 5 additions & 3 deletions renku/core/management/datasets.py
Expand Up @@ -261,6 +261,8 @@ def add_data_to_dataset(
f'Destination is not a directory: "{destination}"'
)

self.check_external_storage()

files = []
if all_at_once: # Importing a dataset
files = self._add_from_urls(
Expand Down Expand Up @@ -393,7 +395,7 @@ def add_data_to_dataset(
raise errors.OperationError(f'Invalid action {action}')

# Track non-symlinks in LFS
if self.has_external_storage:
if self.check_external_storage():
lfs_paths = self.track_paths_in_storage(*files_to_commit)
show_message = self.get_value('renku', 'show_lfs_message')
if (
Expand All @@ -415,7 +417,7 @@ def add_data_to_dataset(
msg = 'renku dataset: committing {} newly added files'.format(
len(files_to_commit)
)
skip_hooks = not self.use_external_storage
skip_hooks = not self.external_storage_requested
self.repo.index.commit(msg, skip_hooks=skip_hooks)
else:
warning_messages.append('No file was added to project')
Expand Down Expand Up @@ -969,7 +971,7 @@ def update_dataset_files(self, files, ref, delete=False):
}
# Force-add to include possible ignored files that are in datasets
self.repo.git.add(*(file_paths), force=True)
skip_hooks = not self.use_external_storage
skip_hooks = not self.external_storage_requested
self.repo.index.commit(
'renku dataset: updated {} files and deleted {} files'.format(
len(updated_files), len(deleted_files), skip_hooks=skip_hooks
Expand Down
52 changes: 28 additions & 24 deletions renku/core/management/storage.py
Expand Up @@ -41,16 +41,16 @@
ARGUMENT_BATCH_SIZE = 100


def ensure_external_storage(fn):
"""Ensure management of external storage on methods which depend on it.
def check_external_storage_wrapper(fn):
"""Check availability of external storage on methods that need it.
:raises: ``errors.ExternalStorageNotInstalled``
:raises: ``errors.ExternalStorageDisabled``
"""
# noqa
@functools.wraps(fn)
def wrapper(self, *args, **kwargs):
if not self.has_external_storage:
if not self.check_external_storage():
pass
else:
return fn(self, *args, **kwargs)
Expand All @@ -62,8 +62,8 @@ def wrapper(self, *args, **kwargs):
class StorageApiMixin(RepositoryApiMixin):
"""Client for handling a data storage."""

use_external_storage = attr.ib(default=True)
"""Use external storage (e.g. LFS)."""
external_storage_requested = attr.ib(default=True)
"""External storage (e.g. LFS) requested for Renku command."""

RENKU_LFS_IGNORE_PATH = '.renkulfsignore'
""".gitignore like file specifying paths that are not tracked in LFS."""
Expand Down Expand Up @@ -96,23 +96,27 @@ def storage_installed(self):
return bool(which('git-lfs'))

@cached_property
def has_external_storage(self):
def storage_installed_locally(self):
"""Verify that git-lfs is installed for the project."""
repo_config = self.repo.config_reader(config_level='repository')
return repo_config.has_section('filter "lfs"')

def check_external_storage(self):
"""Check if repository has external storage enabled.
:raises: ``errors.ExternalStorageNotInstalled``
:raises: ``errors.ExternalStorageDisabled``
"""
repo_config = self.repo.config_reader(config_level='repository')
lfs_enabled = repo_config.has_section('filter "lfs"')

storage_enabled = lfs_enabled and self.storage_installed
if self.use_external_storage and not storage_enabled:
storage_installed = (
self.storage_installed_locally and self.storage_installed
)
if self.external_storage_requested and not storage_installed:
raise errors.ExternalStorageDisabled(self.repo)

if lfs_enabled and not self.storage_installed:
if self.storage_installed_locally and not self.storage_installed:
raise errors.ExternalStorageNotInstalled(self.repo)

return lfs_enabled and self.storage_installed
return storage_installed

@cached_property
def renku_lfs_ignore(self):
Expand Down Expand Up @@ -149,18 +153,18 @@ def init_repository(self, force=False):
result = super().init_repository(force=force)

# initialize LFS if it is requested and installed
if self.use_external_storage and self.storage_installed:
if self.external_storage_requested and self.storage_installed:
self.init_external_storage(force=force)

return result

@ensure_external_storage
@check_external_storage_wrapper
def track_paths_in_storage(self, *paths):
"""Track paths in the external storage."""
# Calculate which paths can be tracked in lfs
if not self.use_external_storage:
if not self.external_storage_requested:
return

# Calculate which paths can be tracked in lfs
track_paths = []
attrs = self.find_attr(*paths)

Expand Down Expand Up @@ -205,7 +209,7 @@ def track_paths_in_storage(self, *paths):
return track_paths
return []

@ensure_external_storage
@check_external_storage_wrapper
def untrack_paths_from_storage(self, *paths):
"""Untrack paths from the external storage."""
try:
Expand All @@ -220,7 +224,7 @@ def untrack_paths_from_storage(self, *paths):
'Couldn\'t run \'git lfs\':\n{0}'.format(e)
)

@ensure_external_storage
@check_external_storage_wrapper
def list_tracked_paths(self, client=None):
"""List paths tracked in lfs for a client."""
client = client or self
Expand All @@ -235,7 +239,7 @@ def list_tracked_paths(self, client=None):
files = [client.path / f for f in files.splitlines()]
return files

@ensure_external_storage
@check_external_storage_wrapper
def list_unpushed_lfs_paths(self, client=None):
"""List paths tracked in lfs for a client."""
client = client or self
Expand Down Expand Up @@ -268,7 +272,7 @@ def list_unpushed_lfs_paths(self, client=None):
]
return files

@ensure_external_storage
@check_external_storage_wrapper
def pull_paths_from_storage(self, *paths):
"""Pull paths from LFS."""
import math
Expand Down Expand Up @@ -303,7 +307,7 @@ def pull_paths_from_storage(self, *paths):
stderr=STDOUT,
)

@ensure_external_storage
@check_external_storage_wrapper
def clean_storage_cache(self, *paths):
"""Remove paths from lfs cache."""
client_dict = defaultdict(list)
Expand Down Expand Up @@ -385,7 +389,7 @@ def clean_storage_cache(self, *paths):

return untracked_paths, local_only_paths

@ensure_external_storage
@check_external_storage_wrapper
def checkout_paths_from_storage(self, *paths):
"""Checkout a paths from LFS."""
run(
Expand All @@ -398,7 +402,7 @@ def checkout_paths_from_storage(self, *paths):

def check_requires_tracking(self, *paths):
"""Check paths and return a list of those that must be tracked."""
if not self.use_external_storage:
if not self.external_storage_requested:
return

attrs = self.find_attr(*paths)
Expand Down
4 changes: 3 additions & 1 deletion renku/core/models/cwl/command_line_tool.py
Expand Up @@ -271,6 +271,8 @@ def generate_tool(self):
@contextmanager
def watch(self, client, no_output=False):
"""Watch a Renku repository for changes to detect outputs."""
client.check_external_storage()

tool = self.generate_tool()
repo = client.repo

Expand Down Expand Up @@ -359,7 +361,7 @@ def watch(self, client, no_output=False):
if not no_output and not paths:
raise errors.OutputsNotFound(repo, inputs.values())

if client.has_external_storage:
if client.check_external_storage():
lfs_paths = client.track_paths_in_storage(*paths)

show_message = client.get_value('renku', 'show_lfs_message')
Expand Down

0 comments on commit e6ea6da

Please sign in to comment.