diff --git a/renku/core/dataset/providers/local.py b/renku/core/dataset/providers/local.py index 19598cd7c5..87607a7f8d 100644 --- a/renku/core/dataset/providers/local.py +++ b/renku/core/dataset/providers/local.py @@ -130,6 +130,7 @@ def add( elif copy: prompt_action = False + ends_with_slash = False u = urllib.parse.urlparse(uri) path = u.path @@ -137,6 +138,9 @@ def add( source_root = Path(get_absolute_path(path)) warnings: List[str] = [] + if source_root.is_dir() and uri.endswith("/"): + ends_with_slash = True + def check_recursive_addition(src: Path): if is_subpath(destination, src): raise errors.ParameterError(f"Cannot recursively add path containing dataset's data directory: {path}") @@ -155,7 +159,12 @@ def get_destination_root(): if source_root.is_dir() and destination_exists and not destination_is_dir: raise errors.ParameterError(f"Cannot copy directory '{path}' to non-directory '{destination}'") - return destination / source_root.name if destination_exists and destination_is_dir else destination + if destination_exists and destination_is_dir: + if ends_with_slash: + return destination + + return destination / source_root.name + return destination def get_metadata(src: Path) -> DatasetAddMetadata: is_tracked = repository.contains(src) diff --git a/renku/ui/cli/dataset.py b/renku/ui/cli/dataset.py index bf397e8063..d09f98ba9a 100644 --- a/renku/ui/cli/dataset.py +++ b/renku/ui/cli/dataset.py @@ -137,6 +137,14 @@ This will copy the contents of ``data-url`` to the dataset and add it to the dataset metadata. +.. note:: + + If the URL refers to a local directory, data is added differently depending + on if there is a trailing slash (``/``) or not. If the URL ends in a slash, + files inside the directory are added to the target directory. If it does + not end in a slash, then the directory itself will be added inside the + target directory. + You can create a dataset when you add data to it for the first time by passing ``--create`` flag to add command: diff --git a/tests/core/commands/test_dataset.py b/tests/core/commands/test_dataset.py index 7aff8dbc00..60807e6ccd 100644 --- a/tests/core/commands/test_dataset.py +++ b/tests/core/commands/test_dataset.py @@ -42,7 +42,7 @@ from renku.core.util.contexts import chdir from renku.core.util.git import get_git_user from renku.core.util.urls import get_slug -from renku.domain_model.dataset import Dataset, is_dataset_name_valid +from renku.domain_model.dataset import Dataset, Url, is_dataset_name_valid from renku.domain_model.project_context import project_context from renku.domain_model.provenance.agent import Person from renku.infrastructure.gateway.dataset_gateway import DatasetGateway @@ -88,6 +88,22 @@ def test_data_add(scheme, path, overwrite, error, project_with_injection, direct assert os.path.exists(target_path) +@pytest.mark.parametrize( + "slash, target", + [ + (False, "data/dataset/dir1/file2"), + (True, "data/dataset/file2"), + ], +) +def test_data_add_trailing_slash(slash, target, directory_tree, project_with_injection): + """Test recursive data imports.""" + + dataset = add_to_dataset("dataset", [str(directory_tree / "dir1") + ("/" if slash else "")], create=True) + + file = next(f for f in dataset.files if f.entity.path.endswith("file2")) + assert file.entity.path == target + + def test_data_add_recursive(directory_tree, project_with_injection): """Test recursive data imports.""" dataset = add_to_dataset("dataset", [str(directory_tree / "dir1")], create=True) @@ -178,7 +194,7 @@ def test_mutate(project): name="my-dataset", creators=[Person.from_string("John Doe ")], date_published=datetime.datetime.now(datetime.timezone.utc), - same_as="http://some-url", + same_as=Url(url_str="http://some-url"), ) old_dataset = copy.deepcopy(dataset) @@ -197,7 +213,7 @@ def test_mutator_is_added_once(project): name="my-dataset", creators=[mutator], date_published=datetime.datetime.now(datetime.timezone.utc), - same_as="http://some-url", + same_as=Url(url_str="http://some-url"), ) old_dataset = copy.deepcopy(dataset)