From 04c1803582184ab9d9367439d39951954f8742b6 Mon Sep 17 00:00:00 2001 From: Mohammad Alisafaee Date: Wed, 7 Oct 2020 17:08:38 +0200 Subject: [PATCH] fix(dataset): explicit failure when cannot pull LFS objects --- renku/core/management/datasets.py | 32 ++++++++++++-------------- tests/cli/test_integration_datasets.py | 13 +++++++++++ 2 files changed, 28 insertions(+), 17 deletions(-) diff --git a/renku/core/management/datasets.py b/renku/core/management/datasets.py index e0d31dcb68..d74b8a0fff 100644 --- a/renku/core/management/datasets.py +++ b/renku/core/management/datasets.py @@ -20,6 +20,7 @@ import concurrent.futures import os import re +import shlex import shutil import tempfile import time @@ -677,23 +678,19 @@ def _get_src_and_dst(path, repo_path, sources, dst_root, used_sources): def _fetch_lfs_files(repo_path, paths): """Fetch and checkout paths that are tracked by Git LFS.""" repo_path = str(repo_path) - try: - output = run(("git", "lfs", "ls-files", "--name-only"), stdout=PIPE, cwd=repo_path, universal_newlines=True) - except SubprocessError: - return - - lfs_files = set(output.stdout.split("\n")) - files = lfs_files & paths - if not files: - return try: - for path in files: - run(["git", "lfs", "pull", "--include", path], cwd=repo_path) + includes = ",".join(shlex.quote(p) for p in paths) + status = run( + ["git", "lfs", "pull", "--include", includes], stderr=PIPE, cwd=repo_path, universal_newlines=True + ) + if status.returncode != 0: + message = "\n\t".join(status.stderr.split("\n")) + raise errors.GitError(f"Cannot pull LFS objects from server: {message}") except KeyboardInterrupt: raise - except SubprocessError: - pass + except SubprocessError as e: + raise errors.GitError(f"Cannot pull LFS objects from server: {e}") @staticmethod def _fetch_files_metadata(client, paths): @@ -952,7 +949,8 @@ def _update_pointer_file(self, pointer_file_path): os.remove(pointer_file_path) return self._create_pointer_file(target, checksum=checksum) - def remove_file(self, filepath): + @staticmethod + def remove_file(filepath): """Remove a file/symlink and its pointer file (for external files).""" path = Path(filepath) try: @@ -1000,7 +998,7 @@ def prepare_git_repo(self, url, ref=None): if not url: raise errors.GitError("Invalid URL.") - RENKU_BRANCH = "renku-default-branch" + renku_branch = "renku-default-branch" def checkout(repo, ref): try: @@ -1008,7 +1006,7 @@ def checkout(repo, ref): except GitCommandError: raise errors.ParameterError('Cannot find reference "{}" in Git repository: {}'.format(ref, url)) - ref = ref or RENKU_BRANCH + ref = ref or renku_branch u = GitURL.parse(url) path = u.pathname if u.hostname == "localhost": @@ -1045,7 +1043,7 @@ def checkout(repo, ref): # Because the name of the default branch is not always 'master', we # create an alias of the default branch when cloning the repo. It # is used to refer to the default branch later. - renku_ref = "refs/heads/" + RENKU_BRANCH + renku_ref = "refs/heads/" + renku_branch try: repo.git.execute(["git", "symbolic-ref", renku_ref, repo.head.reference.path]) checkout(repo, ref) diff --git a/tests/cli/test_integration_datasets.py b/tests/cli/test_integration_datasets.py index 9eb583cb94..e35c2577b5 100644 --- a/tests/cli/test_integration_datasets.py +++ b/tests/cli/test_integration_datasets.py @@ -369,6 +369,19 @@ def test_dataset_reimport_renkulab_dataset(runner, project, url): assert "Dataset exists" in result.output +@pytest.mark.integration +@flaky(max_runs=10, min_passes=1) +def test_renku_dataset_import_missing_lfs_objects(runner, project): + """Test importing a dataset with missing LFS objects fails.""" + result = runner.invoke( + cli, ["dataset", "import", "--yes", "https://dev.renku.ch/datasets/5c11e321-2bea-458c-94ce-abccf4257a54"] + ) + + assert 1 == result.exit_code + assert "Error: Cannot pull LFS objects from server" in result.output + assert "[404] Object does not exist on the server or you don't have permissions to access it" in result.output + + @pytest.mark.integration @flaky(max_runs=10, min_passes=1) @pytest.mark.parametrize(