Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(workflow): crash with external files in a command #2817

Merged
merged 3 commits into from
Apr 8, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
9 changes: 4 additions & 5 deletions renku/core/dataset/dataset_add.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,10 +248,9 @@ def _create_destination_directory(
# what will be its name
dataset_datadir.mkdir(parents=True, exist_ok=True)

destination = destination or Path(".")
destination = cast(Path, get_relative_path(destination, base=dataset_datadir, strict=True))
destination = dataset_datadir / destination
return cast(Path, destination)
destination = destination or ""
relative_path = cast(str, get_relative_path(destination, base=dataset_datadir, strict=True))
return dataset_datadir / relative_path


def _check_ignored_files(client: "LocalClient", files_to_commit: Set[str], files: List[Dict]):
Expand Down Expand Up @@ -365,7 +364,7 @@ def check_sources_are_within_remote_repo():
if not sources:
return
for source in sources:
if get_relative_path(path=source, base=repository.path) is None:
if not is_subpath(path=source, base=repository.path):
raise errors.ParameterError(f"Path '{source}' is not within the repository")

def get_paths_from_remote_repo() -> Set[Path]:
Expand Down
6 changes: 3 additions & 3 deletions renku/core/util/os.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,11 +57,11 @@ def get_safe_relative_path(path: Union[Path, str], base: Union[Path, str]) -> Pa
raise ValueError(f"Path '{path}' is not with base directory '{base}'")


def get_relative_path(path: Union[Path, str], base: Union[Path, str], strict: bool = False) -> Optional[Path]:
def get_relative_path(path: Union[Path, str], base: Union[Path, str], strict: bool = False) -> Optional[str]:
"""Return a relative path to the base if path is within base without resolving symlinks."""
try:
absolute_path = get_absolute_path(path=path, base=base)
return Path(absolute_path).relative_to(base)
return str(Path(absolute_path).relative_to(base))
except ValueError:
if strict:
raise errors.ParameterError(f"File {path} is not within path {base}")
Expand All @@ -83,7 +83,7 @@ def get_relative_paths(base: Union[Path, str], paths: Sequence[Union[Path, str]]
if relative_path is None:
raise errors.ParameterError(f"Path '{path}' is not within base path '{base}'")

relative_paths.append(str(relative_path))
relative_paths.append(relative_path)

return relative_paths

Expand Down
65 changes: 36 additions & 29 deletions renku/core/workflow/plan_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from contextlib import contextmanager
from itertools import chain
from pathlib import Path
from typing import Any, Dict, List, Optional, Set, Tuple, Union
from typing import Any, Dict, List, Optional, Set, Tuple, Union, cast

import click
import yaml
Expand All @@ -36,7 +36,7 @@
from renku.core.management import RENKU_HOME
from renku.core.util.git import is_path_safe
from renku.core.util.metadata import is_external_file
from renku.core.util.os import get_relative_path
from renku.core.util.os import get_absolute_path, get_relative_path, is_subpath
from renku.core.workflow.types import PATH_OBJECTS, Directory, File
from renku.domain_model.datastructures import DirectoryTree
from renku.domain_model.workflow.parameter import (
Expand All @@ -63,8 +63,8 @@ class PlanFactory:
def __init__(
self,
command_line: str,
explicit_inputs: Optional[List[Tuple[str, Optional[str]]]] = None,
explicit_outputs: Optional[List[Tuple[str, Optional[str]]]] = None,
explicit_inputs: Optional[List[Tuple[str, str]]] = None,
explicit_outputs: Optional[List[Tuple[str, str]]] = None,
explicit_parameters: Optional[List[Tuple[str, Optional[str]]]] = None,
directory: Optional[str] = None,
working_dir: Optional[str] = None,
Expand Down Expand Up @@ -102,11 +102,11 @@ def __init__(

self.success_codes = success_codes or []

self.explicit_inputs = (
[(Path(os.path.abspath(path)), name) for path, name in explicit_inputs] if explicit_inputs else []
self.explicit_inputs: List[Tuple[str, str]] = (
[(get_absolute_path(path), name) for path, name in explicit_inputs] if explicit_inputs else []
)
self.explicit_outputs = (
[(Path(os.path.abspath(path)), name) for path, name in explicit_outputs] if explicit_outputs else []
self.explicit_outputs: List[Tuple[str, str]] = (
[(get_absolute_path(path), name) for path, name in explicit_outputs] if explicit_outputs else []
)
self.explicit_parameters = explicit_parameters if explicit_parameters else []

Expand Down Expand Up @@ -146,19 +146,16 @@ def _is_ignored_path(candidate: Union[Path, str], ignored_list: Set[str] = None)
"""Return True if the path is in ignored list."""
return ignored_list is not None and str(candidate) in ignored_list

def _resolve_existing_subpath(self, candidate) -> Optional[Path]:
def _resolve_existing_subpath(self, candidate: Union[Path, str]) -> Optional[Path]:
"""Return a path instance if it exists in the project's directory."""
candidate = Path(candidate)

if not candidate.is_absolute():
candidate = self.directory / candidate
candidate = self.directory / candidate if not os.path.isabs(candidate) else Path(candidate)

if candidate.exists() or candidate.is_symlink():
path = candidate.resolve()

# NOTE: If relative_path is None then it's is either an external file or an absolute path (e.g. /bin/bash)
relative_path = get_relative_path(path=path, base=self.working_dir)
if relative_path is not None:
# NOTE: If resolved path is not within the project then it's is either an external file or an absolute path
# (e.g. /bin/bash)
if is_subpath(path, base=self.working_dir):
return path
elif is_external_file(path=candidate, client_path=self.working_dir):
return Path(os.path.abspath(candidate))
Expand Down Expand Up @@ -324,8 +321,7 @@ def _check_potential_output_directory(
) -> Set[Tuple[str, Optional[str]]]:
"""Check an input/parameter for being a potential output directory."""
subpaths = {str(input_path / path) for path in tree.get(input_path, default=[])}
absolute_path = os.path.abspath(input_path)
if all(Path(absolute_path) != path for path, _ in self.explicit_outputs):
if not self._is_explicit(input_path, self.explicit_outputs):
content = {str(path) for path in input_path.rglob("*") if not path.is_dir() and path.name != ".gitkeep"}
preexisting_paths = content - subpaths
if preexisting_paths:
Expand Down Expand Up @@ -371,6 +367,7 @@ def get_stream_mapping_for_value(self, value: Any):
"""Return a stream mapping if value is a path mapped to a stream."""
if self.stdin and self.stdin == value:
return MappedIOStream(id=MappedIOStream.generate_id("stdin"), stream_type="stdin")

if self.stdout and self.stdout == value:
return MappedIOStream(id=MappedIOStream.generate_id("stdout"), stream_type="stdout")
if self.stderr and self.stderr == value:
Expand All @@ -386,7 +383,7 @@ def add_command_input(
encoding_format: Optional[List[str]] = None,
):
"""Create a CommandInput."""
if self.no_input_detection and all(Path(default_value).resolve() != path for path, _ in self.explicit_inputs):
if self.no_input_detection and not self._is_explicit(default_value, self.explicit_inputs):
return

mapped_stream = self.get_stream_mapping_for_value(default_value)
Expand Down Expand Up @@ -420,7 +417,7 @@ def add_command_output(
mapped_to: Optional[MappedIOStream] = None,
):
"""Create a CommandOutput."""
if self.no_output_detection and all(Path(default_value).resolve() != path for path, _ in self.explicit_outputs):
if self.no_output_detection and not self._is_explicit(default_value, self.explicit_outputs):
return

create_folder = False
Expand Down Expand Up @@ -478,7 +475,7 @@ def add_command_output_from_parameter(self, parameter: CommandParameter, name):
"""Create a CommandOutput from a parameter."""
self.parameters.remove(parameter)
value = Path(self._path_relative_to_root(parameter.default_value))
encoding_format = [DIRECTORY_MIME_TYPE] if value.resolve().is_dir() else self._get_mimetype(value)
encoding_format = [DIRECTORY_MIME_TYPE] if value.is_dir() else self._get_mimetype(value)
self.add_command_output(
default_value=str(value),
prefix=parameter.prefix,
Expand Down Expand Up @@ -512,8 +509,8 @@ def add_explicit_inputs(self):

for explicit_input, name in self.explicit_inputs:
try:
relative_explicit_input = str(explicit_input.relative_to(self.working_dir))
except ValueError:
relative_explicit_input = get_relative_path(explicit_input, base=self.working_dir, strict=True)
except errors.ParameterError:
raise errors.UsageError(
"The input file or directory is not in the repository."
"\n\n\t" + click.style(str(explicit_input), fg="yellow") + "\n\n"
Expand Down Expand Up @@ -611,11 +608,15 @@ def watch(self, client_dispatcher: IClientDispatcher, no_output=False):
candidates |= {(o.b_path, None) for o in repository.unstaged_changes if not o.deleted}

# Filter out explicit outputs
explicit_output_paths = {str(path.relative_to(self.working_dir)) for path, _ in self.explicit_outputs}
explicit_output_paths = {
str(Path(path).relative_to(self.working_dir)) for path, _ in self.explicit_outputs
}
candidates = {c for c in candidates if c[0] not in explicit_output_paths}

# Include explicit outputs
candidates |= {(str(path.relative_to(self.working_dir)), name) for path, name in self.explicit_outputs}
candidates |= {
(str(Path(path).relative_to(self.working_dir)), name) for path, name in self.explicit_outputs
}

candidates = {(path, name) for path, name in candidates if is_path_safe(path)}

Expand All @@ -626,7 +627,7 @@ def watch(self, client_dispatcher: IClientDispatcher, no_output=False):
if (
stream
and all(stream != path for path, _ in candidates)
and (Path(os.path.abspath(stream)) != path for path, _ in self.explicit_outputs)
and not self._is_explicit(stream, self.explicit_outputs)
):
unmodified.add(stream)
elif stream:
Expand All @@ -652,7 +653,8 @@ def watch(self, client_dispatcher: IClientDispatcher, no_output=False):

def _path_relative_to_root(self, path) -> str:
"""Make a potentially relative path in a subdirectory relative to the root of the repository."""
return str((self.directory / path).resolve().relative_to(self.working_dir))
absolute_path = get_absolute_path(path, base=self.directory)
return cast(str, get_relative_path(absolute_path, base=self.working_dir, strict=True))

def _include_indirect_parameters(self):
run_parameters = read_indirect_parameters(self.working_dir)
Expand All @@ -668,7 +670,7 @@ def add_indirect_inputs(self):

for name, indirect_input in read_files_list(indirect_inputs_list).items():
# treat indirect inputs like explicit inputs
path = Path(os.path.abspath(indirect_input))
path = get_absolute_path(indirect_input)
self.explicit_inputs.append((path, name))

# add new explicit inputs (if any) to inputs
Expand All @@ -680,14 +682,19 @@ def add_indirect_outputs(self):

for name, indirect_output in read_files_list(indirect_outputs_list).items():
# treat indirect outputs like explicit outputs
path = Path(os.path.abspath(indirect_output))
path = get_absolute_path(indirect_output)
self.explicit_outputs.append((path, name))

def iter_input_files(self, basedir):
"""Yield tuples with input id and path."""
for input_ in self.inputs:
yield input_.id, os.path.normpath(os.path.join(basedir, input_.default_value))

@staticmethod
def _is_explicit(path: Union[Path, str], explicits_collection: List[Tuple[str, str]]) -> bool:
absolute_path = get_absolute_path(path)
return any(absolute_path == path for path, _ in explicits_collection)

@inject.autoparams("project_gateway")
def to_plan(
self,
Expand Down
6 changes: 6 additions & 0 deletions renku/data/shacl_shape.json
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,9 @@
},
{
"sh:pattern": "mailto:[^@\\s]+@\\S+\\.\\S+"
},
{
"sh:pattern": "http(s)?://[\\S]*orcid.org/[0-9-]+"
}
]
},
Expand Down Expand Up @@ -370,6 +373,9 @@
},
{
"sh:pattern": "mailto:[^@\\s]+@\\S+\\.\\S+"
},
{
"sh:pattern": "http(s)?://[\\S]*orcid.org/[0-9-]+"
}
]
},
Expand Down
16 changes: 14 additions & 2 deletions tests/cli/test_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,8 +120,8 @@ def test_run_metadata(renku_cli, runner, client, client_database_injection_manag
assert 0 == result.exit_code, format_result_exception(result)


def test_run_external_file(renku_cli, runner, client, client_database_injection_manager, tmpdir):
"""Test run with workflow metadata."""
def test_run_with_outside_files(renku_cli, runner, client, client_database_injection_manager, tmpdir):
"""Test run with files that are outside the project."""

external_file = tmpdir.join("file_1")
external_file.write(str(1))
Expand Down Expand Up @@ -265,3 +265,15 @@ def test_run_prints_plan_when_stderr_redirected(split_runner, client):
assert 0 == result.exit_code, format_result_exception(result)
assert "Name: echo-command" in (client.path / "output").read_text()
assert "Name:" not in result.output


def test_run_with_external_files(split_runner, client, directory_tree):
"""Test run commands that use external files."""
assert 0 == split_runner.invoke(cli, ["dataset", "add", "-c", "--external", "my-dataset", directory_tree]).exit_code

path = client.path / "data" / "my-dataset" / "directory_tree" / "file1"

result = split_runner.invoke(cli, ["run", "tail", path], stdout="output")

assert 0 == result.exit_code, format_result_exception(result)
assert "file1" in (client.path / "output").read_text()
18 changes: 18 additions & 0 deletions tests/cli/test_update.py
Original file line number Diff line number Diff line change
Expand Up @@ -490,3 +490,21 @@ def test_update_with_execute(runner, client, renku_cli, client_database_injectio

assert "content_aeven more modified\n" == (client.path / output1).read_text()
assert "content_beven more modified\n" == (client.path / output2).read_text()


def test_update_with_external_files(split_runner, client, directory_tree):
"""Test update commands that use external files."""
assert 0 == split_runner.invoke(cli, ["dataset", "add", "-c", "--external", "my-dataset", directory_tree]).exit_code

path = client.path / "data" / "my-dataset" / "directory_tree" / "file1"

assert 0 == split_runner.invoke(cli, ["run", "tail", path], stdout="output").exit_code

(directory_tree / "file1").write_text("updated file1")

assert 0 == split_runner.invoke(cli, ["dataset", "update", "--all"]).exit_code

result = split_runner.invoke(cli, ["update", "--all"])

assert 0 == result.exit_code, format_result_exception(result)
assert "updated file1" in (client.path / "output").read_text()