Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(dataset): explicit failure when cannot pull LFS objects #1590

Merged
merged 3 commits into from
Oct 12, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
32 changes: 15 additions & 17 deletions renku/core/management/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import concurrent.futures
import os
import re
import shlex
import shutil
import tempfile
import time
Expand Down Expand Up @@ -677,23 +678,19 @@ def _get_src_and_dst(path, repo_path, sources, dst_root, used_sources):
def _fetch_lfs_files(repo_path, paths):
"""Fetch and checkout paths that are tracked by Git LFS."""
repo_path = str(repo_path)
try:
output = run(("git", "lfs", "ls-files", "--name-only"), stdout=PIPE, cwd=repo_path, universal_newlines=True)
except SubprocessError:
return

lfs_files = set(output.stdout.split("\n"))
files = lfs_files & paths
if not files:
return

try:
for path in files:
run(["git", "lfs", "pull", "--include", path], cwd=repo_path)
includes = ",".join(shlex.quote(p) for p in paths)
status = run(
["git", "lfs", "pull", "--include", includes], stderr=PIPE, cwd=repo_path, universal_newlines=True
)
if status.returncode != 0:
message = "\n\t".join(status.stderr.split("\n"))
raise errors.GitError(f"Cannot pull LFS objects from server: {message}")
except KeyboardInterrupt:
raise
except SubprocessError:
pass
except SubprocessError as e:
raise errors.GitError(f"Cannot pull LFS objects from server: {e}")

@staticmethod
def _fetch_files_metadata(client, paths):
Expand Down Expand Up @@ -955,7 +952,8 @@ def _update_pointer_file(self, pointer_file_path):
os.remove(pointer_file_path)
return self._create_pointer_file(target, checksum=checksum)

def remove_file(self, filepath):
@staticmethod
def remove_file(filepath):
"""Remove a file/symlink and its pointer file (for external files)."""
path = Path(filepath)
try:
Expand Down Expand Up @@ -1003,15 +1001,15 @@ def prepare_git_repo(self, url, ref=None):
if not url:
raise errors.GitError("Invalid URL.")

RENKU_BRANCH = "renku-default-branch"
renku_branch = "renku-default-branch"

def checkout(repo, ref):
try:
repo.git.checkout(ref)
except GitCommandError:
raise errors.ParameterError('Cannot find reference "{}" in Git repository: {}'.format(ref, url))

ref = ref or RENKU_BRANCH
ref = ref or renku_branch
u = GitURL.parse(url)
path = u.pathname
if u.hostname == "localhost":
Expand Down Expand Up @@ -1048,7 +1046,7 @@ def checkout(repo, ref):
# Because the name of the default branch is not always 'master', we
# create an alias of the default branch when cloning the repo. It
# is used to refer to the default branch later.
renku_ref = "refs/heads/" + RENKU_BRANCH
renku_ref = "refs/heads/" + renku_branch
try:
repo.git.execute(["git", "symbolic-ref", renku_ref, repo.head.reference.path])
checkout(repo, ref)
Expand Down
13 changes: 13 additions & 0 deletions tests/cli/test_integration_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,19 @@ def test_dataset_reimport_renkulab_dataset(runner, project, url):
assert "Dataset exists" in result.output


@pytest.mark.integration
@flaky(max_runs=10, min_passes=1)
def test_renku_dataset_import_missing_lfs_objects(runner, project):
"""Test importing a dataset with missing LFS objects fails."""
result = runner.invoke(
cli, ["dataset", "import", "--yes", "https://dev.renku.ch/datasets/5c11e321-2bea-458c-94ce-abccf4257a54"]
)

assert 1 == result.exit_code
assert "Error: Cannot pull LFS objects from server" in result.output
assert "[404] Object does not exist on the server or you don't have permissions to access it" in result.output


@pytest.mark.integration
@flaky(max_runs=10, min_passes=1)
@pytest.mark.parametrize(
Expand Down