Skip to content

Commit

Permalink
feat(dataset): default dataset add action in configuration (#3398)
Browse files Browse the repository at this point in the history
  • Loading branch information
m-alisafaee committed Apr 26, 2023
1 parent aafb6ae commit 4f10d5f
Show file tree
Hide file tree
Showing 4 changed files with 102 additions and 5 deletions.
2 changes: 1 addition & 1 deletion renku/core/dataset/dataset_add.py
Original file line number Diff line number Diff line change
Expand Up @@ -371,7 +371,7 @@ def copy_file(file: DatasetAddMetadata, dataset: Dataset, storage: Optional[ISto
if not file.has_action:
return []

# NOTE: If file is in a sub-directory of a dataset's remote storage URI, only update the metadata
# NOTE: If file is in a subdirectory of a dataset's remote storage URI, only update the metadata
if file.from_cloud_storage:
if dataset.storage and is_uri_subfolder(resolve_uri(dataset.storage), file.url):
file.action = DatasetAddAction.METADATA_ONLY
Expand Down
36 changes: 32 additions & 4 deletions renku/core/dataset/providers/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from typing import TYPE_CHECKING, List, Optional

from renku.core import errors
from renku.core.config import get_value
from renku.core.dataset.providers.api import (
AddProviderInterface,
ExporterApi,
Expand Down Expand Up @@ -114,14 +115,32 @@ def get_metadata(
if flags > 1:
raise errors.ParameterError("--move, --copy and --link are mutually exclusive.")

prompt_action = True if flags == 0 else False
prompt_action = False

if move:
default_action = DatasetAddAction.MOVE
elif link:
default_action = DatasetAddAction.SYMLINK
else:
elif copy:
default_action = DatasetAddAction.COPY
else:
prompt_action = True
action = get_value("renku", "default_dataset_add_action")
if action:
prompt_action = False
if action.lower() == "copy":
default_action = DatasetAddAction.COPY
elif action.lower() == "move":
default_action = DatasetAddAction.MOVE
elif action.lower() == "link":
default_action = DatasetAddAction.SYMLINK
else:
raise errors.ParameterError(
f"Invalid default action for adding to datasets in Renku config: '{action}'. "
"Valid values are 'copy', 'link', and 'move'."
)
else:
default_action = DatasetAddAction.COPY

ends_with_slash = False
u = urllib.parse.urlparse(uri)
Expand Down Expand Up @@ -180,6 +199,14 @@ def get_file_metadata(src: Path) -> DatasetAddMetadata:

destination_root = get_destination_root()

if not is_subpath(source_root, project_context.path):
if link:
raise errors.ParameterError(f"Cannot use '--link' for files outside of project: '{uri}'")
if default_action == DatasetAddAction.SYMLINK:
# NOTE: A default action of 'link' cannot be used for external files
action = DatasetAddAction.COPY
prompt_action = True

results = []
if source_root.is_dir():
for file in source_root.rglob("*"):
Expand All @@ -195,8 +222,9 @@ def get_file_metadata(src: Path) -> DatasetAddMetadata:

if not force and prompt_action:
communication.confirm(
f"The following files will be copied to {destination.relative_to(project_context.path)} "
"(use '--move' or '--link' to move or symlink them instead, '--copy' to not show this warning):\n\t"
f"The following files will be copied to {destination.relative_to(project_context.path)}:\n\t"
"(use '--move' or '--link' to move or symlink them instead, '--copy' to not show this warning).\n\t"
"(run 'renku config set renku.default_dataset_add_action copy' to make copy the default action).\n\t"
+ "\n\t".join(str(e.source) for e in results)
+ "\nProceed?",
abort=True,
Expand Down
4 changes: 4 additions & 0 deletions renku/ui/cli/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,10 @@
| ``dataverse.server_url`` | URL for the Dataverse API server | ``None`` |
| | to use | |
+--------------------------------+-------------------------------------+-----------+
| ``default_dataset_add_action`` | Default action when adding files to | ``None`` |
| | datasets. Can be either ``copy`` or | |
| | ``move``. | |
+--------------------------------+-------------------------------------+-----------+
| ``lfs_threshold`` | Threshold file size below which | ``100kb`` |
| | files are not added to git LFS | |
+--------------------------------+-------------------------------------+-----------+
Expand Down
65 changes: 65 additions & 0 deletions tests/cli/test_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -669,6 +669,71 @@ def test_add_local_actions(runner, project, action, existing_paths, missing_path
assert path.is_symlink()


@pytest.mark.parametrize("action, source_exists_after", [("--copy", True), ("--move", False)])
def test_add_non_local_actions(runner, project, directory_tree, action, source_exists_after):
"""Test adding data outside the project with different actions."""
path = directory_tree / "file1"

result = runner.invoke(cli, ["dataset", "add", action, "--create", "local", path])

assert 0 == result.exit_code, format_result_exception(result)
assert source_exists_after == path.exists()
assert (project.path / "data" / "local" / "file1").exists()


def test_add_non_local_link_action(runner, project, directory_tree):
"""Test cannot add and link data outside the project."""
path = directory_tree / "file1"

result = runner.invoke(cli, ["dataset", "add", "--link", "--create", "local", path])

assert 2 == result.exit_code, format_result_exception(result)
assert "Cannot use '--link' for files outside of project:" in result.output


@pytest.mark.parametrize("action, source_exists_after", [("copy", True), ("move", False)])
@pytest.mark.serial
def test_add_default_configured_actions(runner, project, directory_tree, action, source_exists_after):
"""Test adding data with different actions set in Renku configuration file."""
path = directory_tree / "file1"
set_value("renku", "default_dataset_add_action", action, global_only=True)

result = runner.invoke(cli, ["dataset", "add", "--create", "local", path])

assert 0 == result.exit_code, format_result_exception(result)
assert "The following files will be copied to" not in result.output
assert path.exists() is source_exists_after
assert (project.path / "data" / "local" / "file1").exists()


@pytest.mark.serial
def test_add_default_configured_link(runner, project, directory_tree):
"""Test adding data with default ``link`` action should prompt the user."""
path = directory_tree / "file1"
set_value("renku", "default_dataset_add_action", "link", global_only=True)

result = runner.invoke(cli, ["dataset", "add", "--create", "local", path], input="y\n")

assert 0 == result.exit_code, format_result_exception(result)
assert "The following files will be copied to" in result.output
assert path.exists()
assert (project.path / "data" / "local" / "file1").exists()
assert not (project.path / "data" / "local" / "file1").is_symlink()


@pytest.mark.serial
def test_add_default_configured_invalid_action(runner, project, directory_tree):
"""Test adding data with an invalid actions set in Renku configuration file."""
path = directory_tree / "file1"
set_value("renku", "default_dataset_add_action", "invalid", global_only=True)

result = runner.invoke(cli, ["dataset", "add", "--create", "local", path])

assert 2 == result.exit_code, format_result_exception(result)
assert "Invalid default action for adding to datasets in Renku config: 'invalid'." in result.output
assert "Valid values are 'copy', 'link', and 'move'." in result.output


def test_add_an_empty_directory(runner, project, directory_tree):
"""Test adding an empty directory to a dataset."""
path = directory_tree / "empty-directory"
Expand Down

0 comments on commit 4f10d5f

Please sign in to comment.