diff --git a/docs/cheatsheet/conf.py b/docs/cheatsheet/conf.py index bf0e09a146..fcaacabe0b 100644 --- a/docs/cheatsheet/conf.py +++ b/docs/cheatsheet/conf.py @@ -190,16 +190,13 @@ ("py:class", "DiGraph"), ("py:class", "DynamicProxy"), ("py:class", "IActivityGateway"), - ("py:class", "IClientDispatcher"), ("py:class", "IDatasetGateway"), ("py:class", "IPlanGateway"), - ("py:class", "LocalClient"), ("py:class", "NoValueType"), ("py:class", "OID_TYPE"), ("py:class", "Path"), ("py:class", "Persistent"), ("py:class", "optional"), - ("py:class", '"LocalClient"'), ("py:class", '"ValueResolver"'), ("py:exc", "errors.ParameterError"), ] diff --git a/docs/conf.py b/docs/conf.py index 33ba3e94e0..bf510cef8c 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -355,17 +355,14 @@ ("py:class", "DiGraph"), ("py:class", "DynamicProxy"), ("py:class", "IActivityGateway"), - ("py:class", "IClientDispatcher"), ("py:class", "IDatasetGateway"), ("py:class", "IPlanGateway"), ("py:class", "IStorageFactory"), - ("py:class", "LocalClient"), ("py:class", "NoValueType"), ("py:class", "OID_TYPE"), ("py:class", "Path"), ("py:class", "Persistent"), ("py:class", "optional"), - ("py:class", '"LocalClient"'), ("py:class", '"ValueResolver"'), ("py:exc", "errors.ParameterError"), ] diff --git a/docs/reference/gateways.rst b/docs/reference/gateways.rst index 178d8c33d8..00079e67dc 100644 --- a/docs/reference/gateways.rst +++ b/docs/reference/gateways.rst @@ -31,10 +31,6 @@ Interfaces that the Gateways implement. :members: :show-inheritance: -.. automodule:: renku.core.interface.client_dispatcher - :members: - :show-inheritance: - .. automodule:: renku.core.interface.database_gateway :members: :show-inheritance: diff --git a/renku/command/checks/activities.py b/renku/command/checks/activities.py index 1a278afc58..975186e216 100644 --- a/renku/command/checks/activities.py +++ b/renku/command/checks/activities.py @@ -22,14 +22,14 @@ import click from renku.command.command_builder import inject -from renku.command.echo import WARNING +from renku.command.util import WARNING from renku.core.interface.activity_gateway import IActivityGateway from renku.core.util import communication from renku.domain_model.project_context import project_context @inject.autoparams("activity_gateway") -def check_migrated_activity_ids(client, fix, activity_gateway: IActivityGateway, **kwargs): +def check_migrated_activity_ids(fix, activity_gateway: IActivityGateway, **_): """Check that activity ids were correctly migrated in the past.""" activities = activity_gateway.get_all_activities(include_deleted=True) diff --git a/renku/command/checks/datasets.py b/renku/command/checks/datasets.py index 90417a7a2c..49c2bba51a 100644 --- a/renku/command/checks/datasets.py +++ b/renku/command/checks/datasets.py @@ -23,7 +23,7 @@ import click from renku.command.command_builder import inject -from renku.command.echo import WARNING +from renku.command.util import WARNING from renku.core import errors from renku.core.dataset.dataset_add import add_to_dataset from renku.core.interface.dataset_gateway import IDatasetGateway @@ -33,12 +33,11 @@ from renku.domain_model.project_context import project_context -def check_dataset_old_metadata_location(client, **kwargs): +def check_dataset_old_metadata_location(**_): """Check location of dataset metadata. Args: - client: ``LocalClient``. - kwargs: keyword arguments. + _: keyword arguments. Returns: Tuple of whether dataset metadata location is valid and string of found problems. @@ -59,13 +58,12 @@ def check_dataset_old_metadata_location(client, **kwargs): @inject.autoparams("dataset_gateway") -def check_missing_files(client, dataset_gateway: IDatasetGateway, **kwargs): +def check_missing_files(dataset_gateway: IDatasetGateway, **_): """Find missing files listed in datasets. Args: - client: ``LocalClient``. dataset_gateway(IDatasetGateway): the dataset gateway. - kwargs: keyword arguments. + _: keyword arguments. Returns: Tuple of whether all dataset files are there and string of found problems. @@ -96,14 +94,13 @@ def check_missing_files(client, dataset_gateway: IDatasetGateway, **kwargs): @inject.autoparams("dataset_gateway") -def check_invalid_datasets_derivation(client, fix, dataset_gateway: IDatasetGateway, **kwargs): +def check_invalid_datasets_derivation(fix, dataset_gateway: IDatasetGateway, **_): """Remove ``derived_from`` from import datasets. Args: - client: ``LocalClient``. fix: Whether to fix found issues. dataset_gateway(IDatasetGateway): the dataset gateway. - kwargs: keyword arguments. + _: keyword arguments. Returns: Tuple of whether dataset derivations are valid and string of found problems. @@ -146,14 +143,13 @@ def fix_or_report(dataset): @inject.autoparams("dataset_gateway") -def check_dataset_files_outside_datadir(client, fix, dataset_gateway: IDatasetGateway, **kwargs): +def check_dataset_files_outside_datadir(fix, dataset_gateway: IDatasetGateway, **_): """Check for dataset files that are not inside a dataset's datadir. Args: - client: ``LocalClient``. fix: Whether to fix found issues. dataset_gateway(IDatasetGateway): the dataset gateway. - kwargs: keyword arguments. + _: keyword arguments. Returns: Tuple of whether there are no dataset files outside of its datadir and string of found problems. diff --git a/renku/command/checks/external.py b/renku/command/checks/external.py index ded49044d2..4f379a4ff7 100644 --- a/renku/command/checks/external.py +++ b/renku/command/checks/external.py @@ -16,22 +16,20 @@ # See the License for the specific language governing permissions and # limitations under the License. """Checks for external files.""" -import click from renku.command.command_builder import inject -from renku.command.echo import WARNING +from renku.command.util import WARNING, red_text, yellow_text from renku.core.interface.dataset_gateway import IDatasetGateway from renku.domain_model.project_context import project_context @inject.autoparams("dataset_gateway") -def check_missing_external_files(client, dataset_gateway: IDatasetGateway, **kwargs): +def check_missing_external_files(dataset_gateway: IDatasetGateway, **_): """Find external files that are missing. Args: - client: ``LocalClient``. dataset_gateway(IDatasetGateway): The injected dataset gateway. - kwargs: keyword arguments. + _: keyword arguments. Returns: Tuple of whether no external files are missing and string of found problems. @@ -48,13 +46,9 @@ def check_missing_external_files(client, dataset_gateway: IDatasetGateway, **kwa if not missing: return True, None + missing_str = "\n\t".join(f"{yellow_text(path)} -> {red_text(target)}" for path, target in missing) problems = ( - "\n" + WARNING + "There are missing external files.\n" - " (make sure that external paths are accessible)" - + "\n\n\t" - + "\n\t".join( - click.style(path, fg="yellow") + " -> " + click.style(target, fg="red") for path, target in missing - ) - + "\n" + f"\n{WARNING}There are missing external files.\n (make sure that external paths are accessible)" + f"\n\n\t{missing_str}\n" ) return False, problems diff --git a/renku/command/checks/githooks.py b/renku/command/checks/githooks.py index 80df51d7af..370746f7c9 100644 --- a/renku/command/checks/githooks.py +++ b/renku/command/checks/githooks.py @@ -19,7 +19,7 @@ from io import StringIO -from renku.command.echo import WARNING +from renku.command.util import WARNING from renku.core.githooks import HOOKS from renku.core.util.git import get_hook_path from renku.domain_model.project_context import project_context @@ -30,12 +30,11 @@ import importlib.resources as importlib_resources # type: ignore -def check_git_hooks_installed(client, **kwargs): +def check_git_hooks_installed(**_): """Checks if all necessary hooks are installed. Args: - client: ``LocalClient``. - kwargs: keyword arguments. + _: keyword arguments. Returns: Tuple of whether git hooks are valid and string of found problems. @@ -65,8 +64,8 @@ def check_git_hooks_installed(client, **kwargs): return True, None -def _extract_renku_hook(file_): - lines = [line.strip() for line in file_ if line.strip()] +def _extract_renku_hook(file): + lines = [line.strip() for line in file if line.strip()] start = end = -1 for index, line in enumerate(lines): if line.startswith("# RENKU HOOK."): diff --git a/renku/command/checks/migration.py b/renku/command/checks/migration.py index 63922d86b9..f2f9f5ae6d 100644 --- a/renku/command/checks/migration.py +++ b/renku/command/checks/migration.py @@ -16,16 +16,16 @@ # See the License for the specific language governing permissions and # limitations under the License. """Warn if migration is required.""" -from renku.command.echo import ERROR, WARNING -from renku.core.management.migrate import is_migration_required, is_project_unsupported +from renku.command.util import ERROR, WARNING +from renku.core.migration.migrate import is_migration_required, is_project_unsupported -def check_migration(client, **kwargs): + +def check_migration(**_): """Check for project version. Args: - client: ``LocalClient``. - kwargs: keyword arguments. + _: keyword arguments. Returns: Tuple of whether project metadata is up to date and string of found problems. diff --git a/renku/command/checks/project.py b/renku/command/checks/project.py index f7b1a6f388..a0af4cb4f7 100644 --- a/renku/command/checks/project.py +++ b/renku/command/checks/project.py @@ -18,28 +18,30 @@ """Checks needed to determine integrity of the project.""" from renku.command.command_builder import inject -from renku.command.echo import WARNING +from renku.command.util import WARNING from renku.core.interface.project_gateway import IProjectGateway from renku.core.util import communication from renku.domain_model.project import Project +from renku.domain_model.project_context import project_context @inject.autoparams("project_gateway") -def check_project_id_group(client, fix, project_gateway: IProjectGateway, **kwargs): +def check_project_id_group(fix, project_gateway: IProjectGateway, **_): """Check that projects in groups have the correct id set. Args: - client: ``LocalClient``. fix: Whether to fix found issues. project_gateway: Injected project gateway. - kwargs: keyword arguments. + _: keyword arguments. Returns: Tuple of whether project id is valid. """ current_project = project_gateway.get_project() - namespace, name = Project.get_namespace_and_name(use_project_context=True) + namespace, name = Project.get_namespace_and_name( + remote=project_context.remote, repository=project_context.repository + ) if namespace is None or name is None: return True, None diff --git a/renku/command/checks/storage.py b/renku/command/checks/storage.py index b9b1d03cf1..09af347002 100644 --- a/renku/command/checks/storage.py +++ b/renku/command/checks/storage.py @@ -17,16 +17,15 @@ # limitations under the License. """Check for large files in Git history.""" -from renku.command.echo import WARNING +from renku.command.util import WARNING from renku.core.storage import check_external_storage, check_lfs_migrate_info -def check_lfs_info(client, **kwargs): +def check_lfs_info(**_): """Checks if files in history should be in LFS. Args: - client: ``LocalClient`` - kwargs: keyword arguments. + _: keyword arguments. Returns: Tuple of whether project structure is valid and string of found problems. diff --git a/renku/command/checks/validate_shacl.py b/renku/command/checks/validate_shacl.py index cc8c4d0514..00a5cd8aa1 100644 --- a/renku/command/checks/validate_shacl.py +++ b/renku/command/checks/validate_shacl.py @@ -16,13 +16,14 @@ # See the License for the specific language governing permissions and # limitations under the License. """Check KG structure using SHACL.""" + import pyld import yaml from renku.command.command_builder import inject -from renku.command.echo import WARNING from renku.command.schema.dataset import dump_dataset_as_jsonld from renku.command.schema.project import ProjectSchema +from renku.command.util import WARNING from renku.core.interface.dataset_gateway import IDatasetGateway from renku.core.util.shacl import validate_graph from renku.core.util.yaml import NoDatesSafeLoader @@ -53,24 +54,23 @@ def _shacl_graph_to_string(graph): message = "{0}: {1}".format(path, res) else: kind = graph.value(result, sh.sourceConstraintComponent) - focusNode = graph.value(result, sh.focusNode) + focus_node = graph.value(result, sh.focusNode) - if isinstance(focusNode, BNode): - focusNode = "" + if isinstance(focus_node, BNode): + focus_node = "" - message = "{0}: Type: {1}, Node ID: {2}".format(path, kind, focusNode) + message = "{0}: Type: {1}, Node ID: {2}".format(path, kind, focus_node) problems.append(message) return "\n\t".join(problems) -def check_project_structure(client, **kwargs): +def check_project_structure(**_): """Validate project metadata against SHACL. Args: - client: ``LocalClient``. - kwargs: keyword arguments. + _: keyword arguments. Returns: Tuple of whether project structure is valid and string of found problems. @@ -88,13 +88,12 @@ def check_project_structure(client, **kwargs): @inject.autoparams("dataset_gateway") -def check_datasets_structure(client, dataset_gateway: IDatasetGateway, **kwargs): +def check_datasets_structure(dataset_gateway: IDatasetGateway, **_): """Validate dataset metadata against SHACL. Args: - client: The ``LocalClient``. dataset_gateway(IDatasetGateway): The injected dataset gateway. - kwargs: keyword arguments. + _: keyword arguments. Returns: Tuple[bool, str]: Tuple of whether structure is valid and of problems that might have been found. diff --git a/renku/command/checks/workflow.py b/renku/command/checks/workflow.py index de78afaa7b..4cde3ab33a 100644 --- a/renku/command/checks/workflow.py +++ b/renku/command/checks/workflow.py @@ -19,20 +19,19 @@ from typing import Optional, Tuple -from renku.command.echo import WARNING +from renku.command.util import WARNING from renku.core.util import communication from renku.domain_model.project_context import project_context from renku.infrastructure.gateway.activity_gateway import reindex_catalog -def check_activity_catalog(client, fix, force, **kwargs) -> Tuple[bool, Optional[str]]: +def check_activity_catalog(fix, force, **_) -> Tuple[bool, Optional[str]]: """Check if the activity-catalog needs to be rebuilt. Args: - client: ``LocalClient``. fix: Whether to fix found issues. force: Whether to force rebuild the activity catalog. - kwargs: keyword arguments. + _: keyword arguments. Returns: Tuple of whether the activity-catalog needs to be rebuilt and a string of found problems. diff --git a/renku/command/clone.py b/renku/command/clone.py index f16cebb421..90885d11b1 100644 --- a/renku/command/clone.py +++ b/renku/command/clone.py @@ -17,16 +17,12 @@ # limitations under the License. """Clone a Renku repo along with all Renku-specific initializations.""" -from renku.command.command_builder import inject from renku.command.command_builder.command import Command -from renku.core.interface.client_dispatcher import IClientDispatcher from renku.domain_model.project_context import project_context -@inject.autoparams() def _project_clone( url, - client_dispatcher: IClientDispatcher, path=None, install_githooks=True, install_mergetool=True, @@ -43,7 +39,6 @@ def _project_clone( Args: url: Git URL to clone. - client_dispatcher(IClientDispatcher): Injected client dispatcher. path: Path to clone to (Default value = None). install_githooks: Whether to install the pre-commit hook or not (Default value = True). install_mergetool: Whether to install the renku metadata git mergetool or not (Default value = True). @@ -60,7 +55,7 @@ def _project_clone( Tuple of cloned ``Repository`` and whether it's a Renku project or not. """ from renku.command.mergetool import setup_mergetool - from renku.core.management.migrate import is_renku_project + from renku.core.migration.migrate import is_renku_project from renku.core.util.git import clone_renku_repository install_lfs = project_context.external_storage_requested @@ -80,16 +75,11 @@ def _project_clone( use_renku_credentials=use_renku_credentials, ) - client_dispatcher.push_client_to_stack(path=repository.path) - with project_context.with_path(repository.path): - try: - project_initialized = is_renku_project() + project_initialized = is_renku_project() - if project_initialized and install_mergetool: - setup_mergetool(with_attributes=False) - finally: - client_dispatcher.pop_client() + if project_initialized and install_mergetool: + setup_mergetool(with_attributes=False) return repository, project_initialized diff --git a/renku/command/command_builder/client_dispatcher.py b/renku/command/command_builder/client_dispatcher.py deleted file mode 100644 index 9b1fd09cd3..0000000000 --- a/renku/command/command_builder/client_dispatcher.py +++ /dev/null @@ -1,62 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2017-2022 - Swiss Data Science Center (SDSC) -# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and -# Eidgenössische Technische Hochschule Zürich (ETHZ). -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Renku client dispatcher.""" -from pathlib import Path -from typing import Union - -from renku.core import errors -from renku.core.interface.client_dispatcher import IClientDispatcher -from renku.core.management.client import LocalClient -from renku.domain_model.project_context import project_context - - -class ClientDispatcher(IClientDispatcher): - """Dispatch currently active client. - - Handles getting current client (LocalClient) and entering/exiting the stack for the client. - """ - - def __init__(self): - self.client_stack = [] - - @property - def current_client(self) -> LocalClient: - """Get the currently active client.""" - if len(self.client_stack) == 0: - raise errors.ConfigurationError("No client configured for injection") - - return self.client_stack[-1] - - def push_client_to_stack(self, path: Union[Path, str]) -> LocalClient: - """Create and push a new client to the stack.""" - if isinstance(path, str): - path = Path(path) - - with project_context.with_path(path): - new_client = LocalClient() - self.push_created_client_to_stack(new_client) - - return new_client - - def push_created_client_to_stack(self, client: LocalClient) -> None: - """Push an already created client to the stack.""" - self.client_stack.append(client) - - def pop_client(self) -> None: - """Remove the current client from the stack.""" - self.client_stack.pop() diff --git a/renku/command/command_builder/command.py b/renku/command/command_builder/command.py index ee0f6e8875..de9efcf732 100644 --- a/renku/command/command_builder/command.py +++ b/renku/command/command_builder/command.py @@ -22,9 +22,8 @@ import threading from collections import defaultdict from pathlib import Path -from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Union +from typing import Any, Callable, Dict, List, Optional, Union -import click import inject from renku.core import errors @@ -32,9 +31,6 @@ from renku.core.util.git import get_git_path from renku.domain_model.project_context import project_context -if TYPE_CHECKING: - from renku.core.management.client import LocalClient - _LOCAL = threading.local() @@ -82,7 +78,7 @@ def _patched_configure(config: Optional[inject.BinderCallable] = None, bind_in_r bind_in_runtime(bool, optional): Whether to allow binding at runtime (Default value = True). Returns: - Injector: Threadsafe injector with bindings applied. + Injector: Thread-safe injector with bindings applied. """ if getattr(_LOCAL, "injector", None): @@ -136,7 +132,7 @@ def bind(binder): class Command: """Base renku command builder.""" - CLIENT_HOOK_ORDER = 1 + HOOK_ORDER = 1 def __init__(self) -> None: """__init__ of Command.""" @@ -147,8 +143,6 @@ def __init__(self) -> None: self._finalized: bool = False self._track_std_streams: bool = False self._working_directory: Optional[str] = None - self._client: Optional["LocalClient"] = None - self._client_was_created: bool = False def __getattr__(self, name: str) -> Any: """Bubble up attributes of wrapped builders.""" @@ -171,34 +165,14 @@ def _injection_pre_hook(self, builder: "Command", context: dict, *args, **kwargs builder("Command"): Current ``CommandBuilder``. context(dict): Current context dictionary. """ - from renku.command.command_builder.client_dispatcher import ClientDispatcher - from renku.core.interface.client_dispatcher import IClientDispatcher - from renku.core.management.client import LocalClient - - dispatcher = ClientDispatcher() - - ctx = click.get_current_context(silent=True) - if ctx is None: - if self._client: - dispatcher.push_created_client_to_stack(self._client) - else: - path = get_git_path(self._working_directory or ".") - project_context.push_path(path) - self._client = dispatcher.push_client_to_stack(path=path) - self._client_was_created = True - ctx = click.Context(click.Command(builder._operation)) # type: ignore - else: - if not self._client: - self._client = ctx.ensure_object(LocalClient) - dispatcher.push_created_client_to_stack(self._client) + path = get_git_path(self._working_directory or ".") + project_context.push_path(path) - context["bindings"] = {IClientDispatcher: dispatcher, "IClientDispatcher": dispatcher} + context["bindings"] = {} context["constructor_bindings"] = {} - context["client_dispatcher"] = dispatcher - context["click_context"] = ctx def _pre_hook(self, builder: "Command", context: dict, *args, **kwargs) -> None: - """Setup local client. + """Setup project. Args: builder("Command"): Current ``CommandBuilder``. @@ -218,10 +192,7 @@ def _post_hook(self, builder: "Command", context: dict, result: "CommandResult", """ remove_injector() - if self._client_was_created: - if self._client: - context["client_dispatcher"].pop_client() - project_context.pop_context() + project_context.pop_context() if result.error: raise result.error @@ -274,7 +245,7 @@ def _bind(binder): try: with context["stack"]: - output = context["click_context"].invoke(self._operation, *args, **kwargs) + output = self._operation(*args, **kwargs) # type: ignore except errors.RenkuException as e: error = e except (Exception, BaseException): @@ -347,9 +318,9 @@ def build(self) -> "Command": """ if not self._operation: raise errors.ConfigurationError("`Command` needs to have a wrapped `command` set") - self.add_injection_pre_hook(self.CLIENT_HOOK_ORDER, self._injection_pre_hook) - self.add_pre_hook(self.CLIENT_HOOK_ORDER, self._pre_hook) - self.add_post_hook(self.CLIENT_HOOK_ORDER, self._post_hook) + self.add_injection_pre_hook(self.HOOK_ORDER, self._injection_pre_hook) + self.add_pre_hook(self.HOOK_ORDER, self._pre_hook) + self.add_post_hook(self.HOOK_ORDER, self._post_hook) self._finalized = True @@ -394,17 +365,6 @@ def track_std_streams(self) -> "Command": return self - @check_finalized - def with_client_path(self, path: Path) -> "Command": - """Set a client from the given path.""" - from renku.core.management.client import LocalClient - from renku.core.util.contexts import chdir - - with chdir(path=path): - self._client = LocalClient() - - return self - @check_finalized def with_git_isolation(self) -> "Command": """Whether to run in git isolation or not.""" diff --git a/renku/command/command_builder/communication.py b/renku/command/command_builder/communication.py index f501bc112a..ba1dc6bf41 100644 --- a/renku/command/command_builder/communication.py +++ b/renku/command/command_builder/communication.py @@ -24,7 +24,7 @@ class Communicator(Command): """Hook for logging and interaction with user.""" - DEFAULT_ORDER = 2 + HOOK_ORDER = 2 def __init__(self, builder: Command, communicator: communication.CommunicationCallback) -> None: """__init__ of Communicator. @@ -48,7 +48,7 @@ def build(self) -> Command: Returns: Command: Finalized version of this command. """ - self._builder.add_pre_hook(self.DEFAULT_ORDER, self._pre_hook) - self._builder.add_post_hook(self.DEFAULT_ORDER, self._post_hook) + self._builder.add_pre_hook(self.HOOK_ORDER, self._pre_hook) + self._builder.add_post_hook(self.HOOK_ORDER, self._post_hook) return self._builder.build() diff --git a/renku/command/command_builder/database.py b/renku/command/command_builder/database.py index 8d92ca194a..39874ac0e6 100644 --- a/renku/command/command_builder/database.py +++ b/renku/command/command_builder/database.py @@ -61,8 +61,8 @@ def _injection_pre_hook(self, builder: Command, context: dict, *args, **kwargs) """Create a Database singleton.""" from renku.version import __version__ - if "client_dispatcher" not in context: - raise ValueError("Database builder needs a IClientDispatcher to be set.") + if not project_context.has_context(): + raise ValueError("Database builder needs a ProjectContext to be set.") project_context.push_path(path=self._path or project_context.path, save_changes=self._write) diff --git a/renku/command/command_builder/lock.py b/renku/command/command_builder/lock.py index d82977c60b..fa47deb5f2 100644 --- a/renku/command/command_builder/lock.py +++ b/renku/command/command_builder/lock.py @@ -24,7 +24,7 @@ class ProjectLock(Command): """Builder to get a project wide lock.""" - DEFAULT_ORDER = 5 + HOOK_ORDER = 5 def __init__(self, builder: Command) -> None: """__init__ of ProjectLock.""" @@ -32,8 +32,6 @@ def __init__(self, builder: Command) -> None: def _pre_hook(self, builder: Command, context: dict, *args, **kwargs) -> None: """Lock the project.""" - if "client_dispatcher" not in context: - raise ValueError(f"{self.__class__.__name__} builder needs an IClientDispatcher to be set.") if "stack" not in context: raise ValueError(f"{self.__class__.__name__} builder needs a stack to be set.") @@ -42,7 +40,7 @@ def _pre_hook(self, builder: Command, context: dict, *args, **kwargs) -> None: @check_finalized def build(self) -> Command: """Build the command.""" - self._builder.add_pre_hook(self.DEFAULT_ORDER, self._pre_hook) + self._builder.add_pre_hook(self.HOOK_ORDER, self._pre_hook) return self._builder.build() diff --git a/renku/command/command_builder/migration.py b/renku/command/command_builder/migration.py index be4746e2d6..fec12bf8fc 100644 --- a/renku/command/command_builder/migration.py +++ b/renku/command/command_builder/migration.py @@ -18,13 +18,13 @@ """Command builder for migrations.""" from renku.command.command_builder.command import Command, check_finalized -from renku.core.management.migrate import check_for_migration +from renku.core.migration.migrate import check_for_migration class RequireMigration(Command): """Builder to check for migrations.""" - DEFAULT_ORDER = 3 + HOOK_ORDER = 3 def __init__(self, builder: Command) -> None: """__init__ of RequireMigration.""" @@ -37,6 +37,6 @@ def _pre_hook(self, builder: Command, context: dict, *args, **kwargs) -> None: @check_finalized def build(self) -> Command: """Build the command.""" - self._builder.add_pre_hook(self.DEFAULT_ORDER, self._pre_hook) + self._builder.add_pre_hook(self.HOOK_ORDER, self._pre_hook) return self._builder.build() diff --git a/renku/command/command_builder/repo.py b/renku/command/command_builder/repo.py index d052387b6f..59b22b46ba 100644 --- a/renku/command/command_builder/repo.py +++ b/renku/command/command_builder/repo.py @@ -23,12 +23,13 @@ from renku.command.command_builder.command import Command, CommandResult, check_finalized from renku.core import errors from renku.core.git import ensure_clean +from renku.domain_model.project_context import project_context class Commit(Command): """Builder for commands that create a commit.""" - DEFAULT_ORDER = 4 + HOOK_ORDER = 4 def __init__( self, @@ -61,14 +62,16 @@ def _pre_hook(self, builder: Command, context: dict, *args, **kwargs) -> None: builder(Command): The current ``CommandBuilder``. context(dict): The current context object. """ - from renku.core.git import prepare_commit + from renku.core.util.git import prepare_commit - if "client_dispatcher" not in context: - raise ValueError("Commit builder needs a IClientDispatcher to be set.") if "stack" not in context: raise ValueError("Commit builder needs a stack to be set.") - self.diff_before = prepare_commit(commit_only=self._commit_filter_paths, skip_staging=self._skip_staging) + self.diff_before = prepare_commit( + repository=project_context.repository, + commit_only=self._commit_filter_paths, + skip_staging=self._skip_staging, + ) def _post_hook(self, builder: Command, context: dict, result: CommandResult, *args, **kwargs): """Hook that commits changes. @@ -78,7 +81,7 @@ def _post_hook(self, builder: Command, context: dict, result: CommandResult, *ar context(dict): The current context object. result(CommandResult): The result of the command execution. """ - from renku.core.git import finalize_commit + from renku.core.util.git import finalize_commit if result.error: # TODO: Cleanup repo @@ -87,6 +90,8 @@ def _post_hook(self, builder: Command, context: dict, result: CommandResult, *ar try: finalize_commit( diff_before=self.diff_before, + repository=project_context.repository, + transaction_id=project_context.transaction_id, commit_only=self._commit_filter_paths, commit_empty=self._commit_if_empty, raise_if_empty=self._raise_if_empty, @@ -103,8 +108,8 @@ def build(self) -> Command: Returns: Command: Finalized version of this command. """ - self._builder.add_pre_hook(self.DEFAULT_ORDER, self._pre_hook) - self._builder.add_post_hook(self.DEFAULT_ORDER, self._post_hook) + self._builder.add_pre_hook(self.HOOK_ORDER, self._pre_hook) + self._builder.add_post_hook(self.HOOK_ORDER, self._post_hook) return self._builder.build() @@ -126,7 +131,7 @@ def with_commit_message(self, message: str) -> Command: class RequireClean(Command): """Builder to check if repo is clean.""" - DEFAULT_ORDER = 4 + HOOK_ORDER = 4 def __init__(self, builder: Command) -> None: """__init__ of RequireClean.""" @@ -139,8 +144,8 @@ def _pre_hook(self, builder: Command, context: dict, *args, **kwargs) -> None: builder(Command): Current ``CommandBuilder``. context(dict): Current context. """ - if "client_dispatcher" not in context: - raise ValueError("Commit builder needs a IClientDispatcher to be set.") + if not project_context.has_context(): + raise ValueError("Commit builder needs a ProjectContext to be set.") ensure_clean(ignore_std_streams=not builder._track_std_streams) @@ -151,7 +156,7 @@ def build(self) -> Command: Returns: Command: Finalized version of this command. """ - self._builder.add_pre_hook(self.DEFAULT_ORDER, self._pre_hook) + self._builder.add_pre_hook(self.HOOK_ORDER, self._pre_hook) return self._builder.build() @@ -159,7 +164,7 @@ def build(self) -> Command: class Isolation(Command): """Builder to run a command in git isolation.""" - DEFAULT_ORDER = 3 + HOOK_ORDER = 3 def __init__( self, @@ -177,16 +182,10 @@ def _injection_pre_hook(self, builder: Command, context: dict, *args, **kwargs) """ from renku.core.git import prepare_worktree - if "client_dispatcher" not in context: - raise ValueError("Commit builder needs a IClientDispatcher to be set.") - - self.original_client = context["client_dispatcher"].current_client + if not project_context.has_context(): + raise ValueError("Commit builder needs a ProjectContext to be set.") - self.new_client, self.isolation, self.path, self.branch_name = prepare_worktree( - path=None, branch_name=None, commit=None - ) - - context["client_dispatcher"].push_created_client_to_stack(self.new_client) + _, self.isolation, self.path, self.branch_name = prepare_worktree(path=None, branch_name=None, commit=None) def _post_hook(self, builder: Command, context: dict, result: CommandResult, *args, **kwargs): """Hook that commits changes. @@ -197,8 +196,6 @@ def _post_hook(self, builder: Command, context: dict, result: CommandResult, *ar """ from renku.core.git import finalize_worktree - context["client_dispatcher"].pop_client() - try: finalize_worktree( isolation=self.isolation, @@ -219,7 +216,7 @@ def build(self) -> Command: Returns: Command: Finalized version of this command. """ - self._builder.add_injection_pre_hook(self.DEFAULT_ORDER, self._injection_pre_hook) - self._builder.add_post_hook(self.DEFAULT_ORDER, self._post_hook) + self._builder.add_injection_pre_hook(self.HOOK_ORDER, self._injection_pre_hook) + self._builder.add_post_hook(self.HOOK_ORDER, self._post_hook) return self._builder.build() diff --git a/renku/command/cwl_runner.py b/renku/command/cwl_runner.py index 54350c42e6..4019054dd1 100644 --- a/renku/command/cwl_runner.py +++ b/renku/command/cwl_runner.py @@ -24,7 +24,7 @@ import click -from renku.command.echo import progressbar +from renku.command.util import progressbar from renku.core.errors import WorkflowRerunError from renku.core.util.os import expand_directories from renku.domain_model.project_context import project_context diff --git a/renku/command/doctor.py b/renku/command/doctor.py index 02fd859627..81a30f684a 100644 --- a/renku/command/doctor.py +++ b/renku/command/doctor.py @@ -19,9 +19,8 @@ import traceback -from renku.command.command_builder.command import Command, inject -from renku.command.echo import ERROR -from renku.core.interface.client_dispatcher import IClientDispatcher +from renku.command.command_builder.command import Command +from renku.command.util import ERROR DOCTOR_INFO = """\ Please note that the diagnosis report is used to help Renku maintainers with @@ -30,28 +29,24 @@ """ -@inject.autoparams() -def _doctor_check(fix, force, client_dispatcher: IClientDispatcher): +def _doctor_check(fix, force): """Check your system and repository for potential problems. Args: fix: Whether to apply fixes or just check. force: Whether to force-fix some actions. - client_dispatcher(IClientDispatcher): Injected client dispatcher. Returns: Tuple of whether the project is ok or not and list of problems found. """ from renku.command import checks - client = client_dispatcher.current_client - is_ok = True problems = [] for check in checks.__all__: try: - ok, problems_ = getattr(checks, check)(client=client, fix=fix, force=force) + ok, problems_ = getattr(checks, check)(fix=fix, force=force) except Exception: ok = False tb = "\n\t".join(traceback.format_exc().split("\n")) diff --git a/renku/command/init.py b/renku/command/init.py index fe103426ab..0396e356d3 100644 --- a/renku/command/init.py +++ b/renku/command/init.py @@ -23,15 +23,12 @@ from typing import TYPE_CHECKING, Dict, List, Optional from uuid import uuid4 -import attr - from renku.command.command_builder.command import Command, inject from renku.command.mergetool import setup_mergetool from renku.core import errors from renku.core.config import set_value from renku.core.constant import DATA_DIR_CONFIG_KEY, RENKU_HOME -from renku.core.git import commit, with_project_metadata, worktree -from renku.core.interface.client_dispatcher import IClientDispatcher +from renku.core.git import with_worktree from renku.core.interface.database_gateway import IDatabaseGateway from renku.core.migration.utils import OLD_METADATA_PATH from renku.core.storage import init_external_storage, storage_installed @@ -39,13 +36,15 @@ FileAction, RenderedTemplate, TemplateAction, - copy_template_to_client, + copy_template_to_project, fetch_templates_source, get_file_actions, set_template_parameters, ) from renku.core.template.usecase import select_template from renku.core.util import communication +from renku.core.util.contexts import with_project_metadata +from renku.core.util.git import with_commit from renku.core.util.os import is_path_empty from renku.domain_model.project import Project from renku.domain_model.project_context import project_context @@ -80,7 +79,7 @@ def create_backup_branch(path): branch_name = f"pre_renku_init_{hexsha}_{uuid4().hex}" break - with worktree( + with with_worktree( branch_name=branch_name, commit=repository.head.commit, merge_args=["--no-ff", "-s", "recursive", "-X", "ours", "--allow-unrelated-histories"], @@ -99,9 +98,7 @@ def init_command(): return Command().command(_init).with_database() -@inject.autoparams() def _init( - ctx, external_storage_requested, path, name, @@ -116,12 +113,10 @@ def _init( data_dir, initial_branch, install_mergetool, - client_dispatcher: IClientDispatcher, ): """Initialize a renku project. Args: - ctx: Current click context. external_storage_requested: Whether or not external storage should be used. path: Path to initialize repository at. name: Name of the project. @@ -136,18 +131,13 @@ def _init( data_dir: Where to store dataset data. initial_branch: Default git branch. install_mergetool(bool): Whether to set up the renku metadata mergetool in the created project. - client_dispatcher(IClientDispatcher): Injected client dispatcher. """ - client = client_dispatcher.current_client - if not project_context.external_storage_requested: external_storage_requested = False project_context.push_path(path, save_changes=True) project_context.datadir = data_dir project_context.external_storage_requested = external_storage_requested - ctx.obj = client = attr.evolve(client) - client_dispatcher.push_created_client_to_stack(client) communication.echo("Initializing Git repository...") project_context.repository = init_repository(force=force, user=None, initial_branch=initial_branch) @@ -162,7 +152,9 @@ def _init( if template is None: raise errors.TemplateNotFoundError(f"Couldn't find template with id {template_id}") - namespace, name = Project.get_namespace_and_name(use_project_context=True, name=name) + namespace, name = Project.get_namespace_and_name( + remote=project_context.remote, name=name, repository=project_context.repository + ) name = name or os.path.basename(path.rstrip(os.path.sep)) metadata = dict() @@ -304,11 +296,17 @@ def create_from_template( "'http://schema.org/schemaVersion': '9'" ) - with commit(commit_message=commit_message, commit_only=commit_only, skip_dirty_checks=True): + with with_commit( + repository=project_context.repository, + transaction_id=project_context.transaction_id, + commit_message=commit_message, + commit_only=commit_only, + skip_dirty_checks=True, + ): with with_project_metadata( name=name, namespace=namespace, description=description, custom_metadata=custom_metadata, keywords=keywords ) as project: - copy_template_to_client(rendered_template=rendered_template, project=project, actions=actions) + copy_template_to_project(rendered_template=rendered_template, project=project, actions=actions) if install_mergetool: setup_mergetool() diff --git a/renku/command/migrate.py b/renku/command/migrate.py index 4f4cdaef53..ebc4349f52 100644 --- a/renku/command/migrate.py +++ b/renku/command/migrate.py @@ -16,11 +16,8 @@ # See the License for the specific language governing permissions and # limitations under the License. """Migrate project to the latest Renku version.""" -from renku.command.command_builder import inject + from renku.command.command_builder.command import Command -from renku.core.interface.client_dispatcher import IClientDispatcher -from renku.core.interface.project_gateway import IProjectGateway -from renku.core.template.usecase import check_for_template_update from renku.domain_model.project_context import project_context SUPPORTED_RENKU_PROJECT = 1 @@ -37,19 +34,13 @@ def migrations_check(): return Command().command(_migrations_check).with_database(write=False) -@inject.autoparams() -def _migrations_check(client_dispatcher: IClientDispatcher): +def _migrations_check(): """Check migration status of project. - Args: - client_dispatcher(IClientDispatcher): Injected client dispatcher. - Returns: Dictionary of project migrations, template and dockerfile status. """ - from renku.core.management.migrate import is_project_unsupported - - client = client_dispatcher.current_client + from renku.core.migration.migrate import is_project_unsupported core_version, latest_version = _migrations_versions() @@ -57,8 +48,8 @@ def _migrations_check(client_dispatcher: IClientDispatcher): "project_supported": not is_project_unsupported(), "core_renku_version": core_version, "project_renku_version": latest_version, - "core_compatibility_status": _metadata_migration_check(client), - "dockerfile_renku_status": _dockerfile_migration_check(client), + "core_compatibility_status": _metadata_migration_check(), + "dockerfile_renku_status": _dockerfile_migration_check(), "template_status": _template_migration_check(), } @@ -93,6 +84,7 @@ def _template_migration_check(): Returns: Dictionary of template migration status. """ + from renku.core.template.usecase import check_for_template_update try: project = project_context.project @@ -124,17 +116,14 @@ def dockerfile_migration_check(): return Command().command(_dockerfile_migration_check) -def _dockerfile_migration_check(client): +def _dockerfile_migration_check(): """Return Dockerfile migration status. - Args: - client: ``LocalClient``. - Returns: Dictionary of Dockerfile migration status. """ from renku import __version__ - from renku.core.management.migrate import is_docker_update_possible + from renku.core.migration.migrate import is_docker_update_possible automated_dockerfile_update, newer_renku_available, dockerfile_renku_version = is_docker_update_possible() @@ -151,16 +140,13 @@ def metadata_migration_check(): return Command().command(_metadata_migration_check) -def _metadata_migration_check(client): +def _metadata_migration_check(): """Return metadata migration status. - Args: - client: ``LocalClient``. - Returns: Dictionary of metadata migration status. """ - from renku.core.management.migrate import SUPPORTED_PROJECT_VERSION, get_project_version, is_migration_required + from renku.core.migration.migrate import SUPPORTED_PROJECT_VERSION, get_project_version, is_migration_required return { "migration_required": is_migration_required(), @@ -169,42 +155,11 @@ def _metadata_migration_check(client): } -def migrate_project(): +def migrate_project_command(): """Return a command to migrate all project's entities.""" - return Command().command(_migrate_project).lock_project().require_clean().with_database(write=True) + from renku.core.migration.migrate import migrate_project - -def _migrate_project( - force_template_update=False, - skip_template_update=False, - skip_docker_update=False, - skip_migrations=False, - strict=False, - preserve_identifiers=False, -): - """Migrate all project's entities. - - Args: - force_template_update: Whether to force update the template (Default value = False). - skip_template_update: Whether to skip updating the template (Default value = False). - skip_docker_update: Whether to skip updating the Dockerfile (Default value = False). - skip_migrations: Whether to skip migrating project metadata (Default value = False). - strict: Whether to fail on errors (Default value = False). - preserve_identifiers: Whether to preserve ids when migrating metadata (Default value = False). - - Returns: - Dictionary of project migration status. - """ - from renku.core.management.migrate import migrate - - return migrate( - force_template_update=force_template_update, - skip_template_update=skip_template_update, - skip_docker_update=skip_docker_update, - skip_migrations=skip_migrations, - strict=strict, - preserve_identifiers=preserve_identifiers, - ) + return Command().command(migrate_project).lock_project().require_clean().with_database(write=True) def check_project(): @@ -212,14 +167,14 @@ def check_project(): return Command().command(_check_project).with_database(write=False) -@inject.autoparams() -def _check_project(project_gateway: IProjectGateway): - from renku.core.management.migrate import ( +def _check_project(): + from renku.core.migration.migrate import ( is_docker_update_possible, is_migration_required, is_project_unsupported, is_renku_project, ) + from renku.core.template.usecase import check_for_template_update if not is_renku_project(): return NON_RENKU_REPOSITORY @@ -227,7 +182,7 @@ def _check_project(project_gateway: IProjectGateway): return UNSUPPORTED_PROJECT try: - project_gateway.get_project() + _ = project_context.project except ValueError: return MIGRATION_REQUIRED @@ -245,23 +200,17 @@ def _check_project(project_gateway: IProjectGateway): return status | SUPPORTED_RENKU_PROJECT -@inject.autoparams() -def _check_immutable_template_files(paths, project_gateway: IProjectGateway): +def _check_immutable_template_files(paths): """Check paths and return a list of those that are marked immutable in the project template. Args: paths: Paths to check. - project_gateway(IProjectGateway): Injected project gateway. Returns: List of immutable template files. """ - project = project_gateway.get_project() - - if not project.immutable_template_files: - return [] + immutable_template_files = project_context.project.immutable_template_files or [] - immutable_template_files = project.immutable_template_files or [] return [p for p in paths if str(p) in immutable_template_files] diff --git a/renku/command/move.py b/renku/command/move.py index 62ffd52e11..a72211d48e 100644 --- a/renku/command/move.py +++ b/renku/command/move.py @@ -248,8 +248,8 @@ def _warn_about_dataset_files(files, dataset_gateway: IDatasetGateway): ) -def _show_moved_files(client_path, files): +def _show_moved_files(project_path, files): for path in sorted(files): - src = path.relative_to(client_path) - dst = files[path].relative_to(client_path) + src = path.relative_to(project_path) + dst = files[path].relative_to(project_path) communication.echo(f"{src} -> {dst}") diff --git a/renku/command/remove.py b/renku/command/remove.py index 6a2eeefa40..9b2bd133dd 100644 --- a/renku/command/remove.py +++ b/renku/command/remove.py @@ -45,7 +45,7 @@ def _remove(sources, edit_command, dataset_gateway: IDatasetGateway): repository = project_context.repository def get_relative_path(path): - """Format path as relative to the client path.""" + """Format path as relative to the project path.""" abs_path = os.path.abspath(project_context.path / path) try: return str(Path(abs_path).relative_to(project_context.path)) diff --git a/renku/command/rerun.py b/renku/command/rerun.py index 27f92193cc..6ed9b7dd24 100644 --- a/renku/command/rerun.py +++ b/renku/command/rerun.py @@ -22,7 +22,6 @@ from renku.command.command_builder.command import Command, inject from renku.core import errors from renku.core.interface.activity_gateway import IActivityGateway -from renku.core.interface.client_dispatcher import IClientDispatcher from renku.core.util.os import get_relative_paths from renku.core.workflow.activity import get_activities_until_paths, sort_activities from renku.core.workflow.concrete_execution_graph import ExecutionGraph @@ -47,7 +46,6 @@ def _rerun( paths: List[str], provider: str, config: Optional[str], - client_dispatcher: IClientDispatcher, activity_gateway: IActivityGateway, ): """Rerun a previously run workflow. @@ -59,7 +57,6 @@ def _rerun( paths (List[str]): Output paths to recreate. provider (str): Name of the workflow provider to use for execution. config (str): Path to configuration for the workflow provider. - client_dispatcher(IClientDispatcher): The injected client dispatcher. activity_gateway (IActivityGateway): Injected activity gateway. """ diff --git a/renku/command/rollback.py b/renku/command/rollback.py index 4ccad03824..202c690128 100644 --- a/renku/command/rollback.py +++ b/renku/command/rollback.py @@ -17,15 +17,12 @@ # limitations under the License. """Renku ``status`` command.""" - import os.path import re from itertools import islice from typing import Tuple -from renku.command.command_builder import inject from renku.command.command_builder.command import Command -from renku.core.interface.client_dispatcher import IClientDispatcher from renku.core.util import communication from renku.domain_model.dataset import Dataset from renku.domain_model.project_context import project_context @@ -40,11 +37,8 @@ def rollback_command(): return Command().command(_rollback_command).require_clean().require_migration().with_database() -@inject.autoparams() -def _rollback_command(client_dispatcher: IClientDispatcher): +def _rollback_command(): """Perform a rollback of the repo.""" - current_client = client_dispatcher.current_client - commits = project_context.repository.iterate_commits(project_context.metadata_path) checkpoint = _prompt_for_checkpoint(commits) @@ -54,7 +48,7 @@ def _rollback_command(client_dispatcher: IClientDispatcher): diff = checkpoint[1].get_changes(commit="HEAD") - confirmation_message, has_changes = _get_confirmation_message(diff, current_client) + confirmation_message, has_changes = _get_confirmation_message(diff) if not has_changes: communication.echo("There would be no changes rolling back to the selected command, exiting.") @@ -65,17 +59,16 @@ def _rollback_command(client_dispatcher: IClientDispatcher): project_context.repository.reset(checkpoint[1], hard=True) -def _get_confirmation_message(diff, client) -> Tuple[str, bool]: +def _get_confirmation_message(diff) -> Tuple[str, bool]: """Create a confirmation message for changes that would be done by a rollback. Args: diff: Diff between two commits. - client: Current ``LocalClient``. Returns: Tuple[str, bool]: Tuple of confirmation message and if there would be changes. """ - modifications = _get_modifications_from_diff(client, diff) + modifications = _get_modifications_from_diff(diff) has_changes = False @@ -112,11 +105,10 @@ def _get_confirmation_message(diff, client) -> Tuple[str, bool]: return confirmation_message, has_changes -def _get_modifications_from_diff(client, diff): +def _get_modifications_from_diff(diff): """Get all modifications from a diff. Args: - client: Current ``LocalClient``. diff: Diff between two commits. Returns: @@ -225,7 +217,6 @@ def _prompt_for_checkpoint(commits): prompt = "Checkpoint ([q] to quit)" if more_pages: prompt += ", [m] for more)" - default = "m" else: prompt += ")" selection = communication.prompt("Checkpoint ([q] to quit)", default="q") diff --git a/renku/command/run.py b/renku/command/run.py index f8cffba960..1d61e2dd76 100644 --- a/renku/command/run.py +++ b/renku/command/run.py @@ -199,7 +199,11 @@ def parse_explicit_definition(entries, type): plan = tool.to_plan(name=name, description=description, keywords=keyword) activity = Activity.from_plan( - plan=plan, started_at_time=started_at_time, ended_at_time=ended_at_time, annotations=tool.annotations + plan=plan, + repository=project_context.repository, + started_at_time=started_at_time, + ended_at_time=ended_at_time, + annotations=tool.annotations, ) activity_gateway.add(activity) diff --git a/renku/command/schema/calamus.py b/renku/command/schema/calamus.py index 46c2b2564c..fb29c3d0d1 100644 --- a/renku/command/schema/calamus.py +++ b/renku/command/schema/calamus.py @@ -40,10 +40,9 @@ class JsonLDSchema(CalamusJsonLDSchema): """Base schema class for Renku.""" - def __init__(self, *args, commit=None, client=None, **kwargs): + def __init__(self, *args, commit=None, **kwargs): """Create an instance.""" self._commit = commit - self._client = client super().__init__(*args, **kwargs) def _deserialize(self, *args, **kwargs): @@ -51,14 +50,11 @@ def _deserialize(self, *args, **kwargs): const_args = inspect.signature(self.opts.model) parameters = const_args.parameters.values() - if any(p.name == "client" for p in parameters): - self._add_field_to_data(data, "client", self._client) - if any(p.name == "commit" for p in parameters): if self._commit: self._add_field_to_data(data, "commit", self._commit) elif ( - self._client + project_context.has_context() and "_label" in data and data["_label"] and "@UNCOMMITTED" not in data["_label"] @@ -148,14 +144,12 @@ def _deserialize(self, value, attr, data, **kwargs): class Nested(fields.Nested): - """Nested field that passes along client and commit info.""" + """Nested field that passes along commit info.""" - def __init__(self, *args, propagate_client=True, **kwargs): + def __init__(self, *args, **kwargs): """Init method.""" super().__init__(*args, **kwargs) - self.propagate_client = propagate_client - @property def schema(self): """The nested ``calamus.Schema`` object. @@ -211,9 +205,6 @@ def schema(self): kwargs = {} - if self.propagate_client and hasattr(self.root, "_client") and JsonLDSchema in schema_class.mro(): - kwargs = {"client": self.root._client} - self._schema["from"][rdf_type] = schema_class( many=False, only=self.only, diff --git a/renku/command/update.py b/renku/command/update.py index 787220e51f..a21635d1c3 100644 --- a/renku/command/update.py +++ b/renku/command/update.py @@ -69,7 +69,7 @@ def _update( starting_activities=modified_activities, paths=paths, ignore_deleted=ignore_deleted, - client_path=project_context.path, + project_path=project_context.path, ) if len(activities) == 0: diff --git a/renku/command/echo.py b/renku/command/util.py similarity index 87% rename from renku/command/echo.py rename to renku/command/util.py index 9282749426..8849e61be1 100644 --- a/renku/command/echo.py +++ b/renku/command/util.py @@ -44,3 +44,13 @@ def echo_via_pager(*args, **kwargs): progressbar = functools.partial( click.progressbar, fill_char=click.style(" ", bg="green"), show_pos=True, item_show_func=lambda x: x ) + + +def yellow_text(text: str) -> str: + """Return text in yellow.""" + return click.style(text, fg="yellow") + + +def red_text(text: str) -> str: + """Return text in red.""" + return click.style(text, fg="red") diff --git a/renku/core/config.py b/renku/core/config.py index be442f4a43..8d16ea893b 100644 --- a/renku/core/config.py +++ b/renku/core/config.py @@ -15,7 +15,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Client for handling a configuration.""" +"""Configuration management.""" import configparser import os diff --git a/renku/core/dataset/context.py b/renku/core/dataset/context.py index 6efc6aaf8e..3748e58b1d 100644 --- a/renku/core/dataset/context.py +++ b/renku/core/dataset/context.py @@ -17,8 +17,6 @@ # limitations under the License. """Dataset context managers.""" -import contextlib -import time from pathlib import Path from typing import Optional @@ -78,15 +76,3 @@ def __exit__(self, exc_type, exc_value, traceback): self.datasets_provenance = DatasetsProvenance() self.datasets_provenance.add_or_update(self.dataset, creator=self.creator) project_context.database.commit() - - -@contextlib.contextmanager -def wait_for(delay: float): - """Make sure that at least ``delay`` seconds are passed during the execution of the wrapped code block.""" - start = time.time() - - yield - - exec_time = time.time() - start - if exec_time < delay: - time.sleep(delay - exec_time) diff --git a/renku/core/dataset/dataset.py b/renku/core/dataset/dataset.py index 28c6fc52ca..42b6999d56 100644 --- a/renku/core/dataset/dataset.py +++ b/renku/core/dataset/dataset.py @@ -36,7 +36,6 @@ from renku.core.dataset.providers.s3 import S3Credentials from renku.core.dataset.request_model import ImageRequestModel from renku.core.dataset.tag import get_dataset_by_tag, prompt_access_token, prompt_tag_selection -from renku.core.interface.client_dispatcher import IClientDispatcher from renku.core.interface.dataset_gateway import IDatasetGateway from renku.core.interface.storage import IStorageFactory from renku.core.storage import check_external_storage, pull_paths_from_storage, track_paths_in_storage @@ -67,7 +66,6 @@ from renku.infrastructure.immutable import DynamicProxy if TYPE_CHECKING: - from renku.core.management.client import LocalClient from renku.infrastructure.repository import Repository @@ -376,17 +374,14 @@ def remove_dataset(name): datasets_provenance.remove(dataset=dataset) -@inject.autoparams() -def export_dataset(name, provider_name, tag, client_dispatcher: IClientDispatcher, **kwargs): +def export_dataset(name, provider_name, tag, **kwargs): """Export data to 3rd party provider. Args: name: Name of dataset to export. provider_name: Provider to use for export. tag: Dataset tag from which to export. - client_dispatcher(IClientDispatcher): Injected client dispatcher. """ - client = client_dispatcher.current_client datasets_provenance = DatasetsProvenance() provider_name = provider_name.lower() @@ -433,7 +428,7 @@ def export_dataset(name, provider_name, tag, client_dispatcher: IClientDispatche exporter.set_access_token(access_token) try: - destination = exporter.export(client=client) + destination = exporter.export() except errors.AuthenticationError: remove_value(provider_name, config_key_secret, global_only=True) raise @@ -474,8 +469,8 @@ def confirm_download(files): communication.echo(tabulate(files, headers=headers, floatfmt=".2f")) communication.confirm("Do you wish to download this version?", abort=True, warning=True) - def calculate_total_size(files): - total_size = 0.0 + def calculate_total_size(files) -> int: + total_size = 0 for file in files: if file.filesize is not None: total_size += file.filesize @@ -568,7 +563,6 @@ def update_datasets( check_data_directory: bool, update_all: bool, dry_run: bool, - client_dispatcher: IClientDispatcher, dataset_gateway: IDatasetGateway, ) -> Tuple[List[DatasetViewModel], List[DatasetFileViewModel]]: """Update dataset files. @@ -586,7 +580,6 @@ def update_datasets( check_data_directory(bool): Whether to check the dataset's data directory for new files. update_all(bool): Whether to update all datasets. dry_run(bool): Whether to return a preview of what would be updated. - client_dispatcher(IClientDispatcher): Injected client dispatcher. dataset_gateway(IDatasetGateway): Injected dataset gateway. """ from renku.core.dataset.providers.renku import RenkuProvider @@ -594,8 +587,6 @@ def update_datasets( if not update_all and not names and not include and not exclude and not dry_run: raise errors.ParameterError("No update criteria is specified") - client = client_dispatcher.current_client - imported_dataset_updates: List[Dataset] = [] all_datasets = dataset_gateway.get_all_active_datasets() @@ -710,7 +701,7 @@ def update_datasets( deleted_files: List[DynamicProxy] = [] if external_files and not no_external: - updated = update_external_files(client, external_files, dry_run=dry_run) + updated = update_external_files(external_files, dry_run=dry_run) updated_files.extend(updated) if git_files and not no_remote: @@ -736,7 +727,7 @@ def update_datasets( repository.add(*file_paths, force=True) repository.add(project_context.pointers_path, force=True) - _update_datasets_files_metadata(client, updated_files, deleted_files, delete) + _update_datasets_files_metadata(updated_files, deleted_files, delete) message = f"Updated {len(updated_files)} files" if delete: @@ -780,16 +771,12 @@ def show_dataset(name: str, tag: Optional[str] = None): return DatasetDetailsJson().dump(dataset) -@inject.autoparams("client_dispatcher") -def add_datadir_files_to_dataset(dataset: Dataset, client_dispatcher: IClientDispatcher) -> None: +def add_datadir_files_to_dataset(dataset: Dataset) -> None: """Add all files in a datasets data directory to the dataset. Args: - client_dispatcher(IClientDispatcher): The client dispatcher. dataset(Dataset): The dataset to add data dir files to. """ - client = client_dispatcher.current_client - datadir = get_safe_relative_path(dataset.get_datadir(), project_context.path) if datadir.exists(): @@ -798,7 +785,7 @@ def add_datadir_files_to_dataset(dataset: Dataset, client_dispatcher: IClientDis files: List[Path] = [] for file in get_files(datadir): files.append(file) - dataset_files.append(DatasetFile.from_path(client=client, path=file, source=file)) + dataset_files.append(DatasetFile.from_path(path=file, source=file)) if not dataset_files: return @@ -873,9 +860,8 @@ def update_dataset_custom_metadata(dataset: Dataset, custom_metadata: Optional[D dataset.annotations = existing_metadata -@inject.autoparams("client_dispatcher", "dataset_gateway") +@inject.autoparams("dataset_gateway") def move_files( - client_dispatcher: IClientDispatcher, dataset_gateway: IDatasetGateway, files: Dict[Path, Path], to_dataset_name: Optional[str] = None, @@ -883,13 +869,10 @@ def move_files( """Move files and their metadata from one or more datasets to a target dataset. Args: - client_dispatcher(IClientDispatcher): Injected client dispatcher. dataset_gateway(IDatasetGateway):Injected dataset gateway. files(Dict[Path, Path]): Files to move to_dataset_name(Optional[str], optional): Target dataset (Default value = None) """ - client = client_dispatcher.current_client - datasets = [d.copy() for d in dataset_gateway.get_all_active_datasets()] to_dataset: Optional[Dataset] = None @@ -905,7 +888,7 @@ def move_files( src = src.relative_to(project_context.path) dst = dst.relative_to(project_context.path) # NOTE: Files are moved at this point, so, we can use dst - new_dataset_file = DatasetFile.from_path(client, dst) + new_dataset_file = DatasetFile.from_path(dst) for dataset in datasets: removed = dataset.unlink_file(src, missing_ok=True) @@ -999,18 +982,11 @@ def update_dataset_local_files( return updated_files, deleted_files, new_files -def _update_datasets_files_metadata( - client: "LocalClient", - updated_files: List[DynamicProxy], - deleted_files: List[DynamicProxy], - delete: bool, -): +def _update_datasets_files_metadata(updated_files: List[DynamicProxy], deleted_files: List[DynamicProxy], delete: bool): modified_datasets = {} for file in updated_files: - new_file = DatasetFile.from_path( - client=client, path=file.entity.path, based_on=file.based_on, source=file.source - ) + new_file = DatasetFile.from_path(path=file.entity.path, based_on=file.based_on, source=file.source) modified_datasets[file.dataset.name] = file.dataset file.dataset.add_or_update_files(new_file) @@ -1024,27 +1000,21 @@ def _update_datasets_files_metadata( datasets_provenance.add_or_update(dataset, creator=get_git_user(repository=project_context.repository)) -@inject.autoparams("client_dispatcher") def update_dataset_git_files( - client_dispatcher: IClientDispatcher, files: List[DynamicProxy], ref: str, delete: bool, dry_run: bool + files: List[DynamicProxy], ref: Optional[str], delete: bool, dry_run: bool ) -> Tuple[List[DynamicProxy], List[DynamicProxy]]: """Update files and dataset metadata according to their remotes. Args: - client_dispatcher(IClientDispatcher): Injected client dispatcher. files(List[DynamicProxy]): List of files to be updated. - ref(str): Reference to use for update. + ref(Optional[str]): Reference to use for update. delete(bool, optional): Indicates whether to delete files or not (Default value = False). dry_run(bool): Whether to perform update or only print changes. Returns: Tuple[List[DynamicProxy], List[DynamicProxy]]: Tuple of updated and deleted file records. """ - from renku.core.management.client import LocalClient - - client = client_dispatcher.current_client - - visited_repos: Dict[str, Tuple["Repository", LocalClient]] = {} + visited_repos: Dict[str, "Repository"] = {} updated_files: List[DynamicProxy] = [] deleted_files: List[DynamicProxy] = [] @@ -1060,15 +1030,12 @@ def update_dataset_git_files( based_on = file.based_on url = based_on.url if url in visited_repos: - remote_repository, remote_client = visited_repos[url] + remote_repository = visited_repos[url] else: communication.echo(msg="Cloning remote repository...") - remote_repository = clone_repository( - url=url, path=get_cache_directory_for_repository(client=client, url=url), checkout_revision=ref - ) - with project_context.with_path(remote_repository.path): - remote_client = LocalClient() - visited_repos[url] = remote_repository, remote_client + path = get_cache_directory_for_repository(url=url) + remote_repository = clone_repository(url=url, path=path, checkout_revision=ref) + visited_repos[url] = remote_repository checksum = remote_repository.get_object_hash(path=based_on.path, revision="HEAD") found = checksum is not None @@ -1086,9 +1053,9 @@ def update_dataset_git_files( if not dry_run: # Fetch file if it is tracked by Git LFS pull_paths_from_storage(remote_repository, remote_repository.path / based_on.path) - if is_external_file(path=src, client_path=remote_repository.path): + if is_external_file(path=src, project_path=remote_repository.path): delete_dataset_file(dst, follow_symlinks=True) - create_external_file(client=client, target=src.resolve(), path=dst) + create_external_file(target=src.resolve(), path=dst) else: shutil.copy(src, dst) file.based_on = RemoteEntity( @@ -1105,11 +1072,10 @@ def update_dataset_git_files( return updated_files, deleted_files -def update_external_files(client: "LocalClient", records: List[DynamicProxy], dry_run: bool) -> List[DynamicProxy]: +def update_external_files(records: List[DynamicProxy], dry_run: bool) -> List[DynamicProxy]: """Update files linked to external storage. Args: - client("LocalClient"): The ``LocalClient``. records(List[DynamicProxy]): File records to update. dry_run(bool): Whether to return a preview of what would be updated. """ @@ -1118,7 +1084,7 @@ def update_external_files(client: "LocalClient", records: List[DynamicProxy], dr for file in records: if file.is_external: try: - updated, checksum = is_external_file_updated(client_path=project_context.path, path=file.entity.path) + updated, checksum = is_external_file_updated(project_path=project_context.path, path=file.entity.path) except errors.ExternalFileNotFound as e: if not dry_run: raise @@ -1127,15 +1093,14 @@ def update_external_files(client: "LocalClient", records: List[DynamicProxy], dr if updated: if not dry_run: - update_external_file(client=client, path=file.entity.path, checksum=checksum) + update_external_file(path=file.entity.path, checksum=checksum) updated_files.append(file) return updated_files -@inject.autoparams("client_dispatcher", "dataset_gateway") +@inject.autoparams("dataset_gateway") def filter_dataset_files( - client_dispatcher: IClientDispatcher, dataset_gateway: IDatasetGateway, names: Optional[List[str]] = None, tag: Optional[str] = None, @@ -1149,7 +1114,6 @@ def filter_dataset_files( """Filter dataset files by specified filters. Args: - client_dispatcher(IClientDispatcher): Injected client dispatcher. dataset_gateway(IDatasetGateway):Injected dataset gateway. names(Optional[List[str]]): Filter by specified dataset names (Default value = None). tag(Optional[str]): Filter by specified tag (Default value = None). @@ -1179,8 +1143,6 @@ def should_include(filepath: Path) -> bool: return True - client = client_dispatcher.current_client - if isinstance(creators, str): creators_set = set(creators.split(",")) elif isinstance(creators, list) or isinstance(creators, tuple): @@ -1220,7 +1182,6 @@ def should_include(filepath: Path) -> bool: record = DynamicProxy(file) record.dataset = dataset - record.client = client records.append(record) if not check_data_directory: @@ -1232,9 +1193,8 @@ def should_include(filepath: Path) -> bool: file_path = get_safe_relative_path(current_folder / current_file, project_context.path) if should_include(file_path) and not dataset.find_file(file_path): # New file in dataset folder - record = DynamicProxy(DatasetFile.from_path(client, file_path)) + record = DynamicProxy(DatasetFile.from_path(file_path)) record.dataset = dataset - record.client = client records.append(record) if unused_names: @@ -1244,19 +1204,15 @@ def should_include(filepath: Path) -> bool: return sorted(records, key=lambda r: r.date_added) -@inject.autoparams("client_dispatcher", "storage_factory") -def pull_external_data( - name: str, client_dispatcher: IClientDispatcher, storage_factory: IStorageFactory, location: Optional[Path] = None -) -> None: +@inject.autoparams("storage_factory") +def pull_external_data(name: str, storage_factory: IStorageFactory, location: Optional[Path] = None) -> None: """Pull/copy data for an external storage to a dataset's data directory or a specified location. Args: name(str): Name of the dataset - client_dispatcher(IClientDispatcher): Injected client dispatcher. storage_factory(IStorageFactory):Injected storage factory. location(Optional[Path]): A directory to copy data to (Default value = None). """ - client = client_dispatcher.current_client datasets_provenance = DatasetsProvenance() dataset = datasets_provenance.get_by_name(name=name, strict=True) @@ -1320,7 +1276,7 @@ def pull_external_data( store_dataset_data_location(dataset=dataset, location=location) if updated_files: - _update_datasets_files_metadata(client, updated_files=updated_files, deleted_files=[], delete=False) + _update_datasets_files_metadata(updated_files=updated_files, deleted_files=[], delete=False) def store_dataset_data_location(dataset: Dataset, location: Optional[Path]) -> None: @@ -1339,7 +1295,7 @@ def read_dataset_data_location(dataset: Dataset) -> Optional[str]: return get_value(section="dataset-locations", key=dataset.name, config_filter=ConfigFilter.LOCAL_ONLY) -@inject.autoparams("client_dispatcher", "storage_factory") +@inject.autoparams("storage_factory") def mount_external_storage( name: str, existing: Optional[Path], diff --git a/renku/core/dataset/dataset_add.py b/renku/core/dataset/dataset_add.py index e257e1a757..220e6ad09c 100644 --- a/renku/core/dataset/dataset_add.py +++ b/renku/core/dataset/dataset_add.py @@ -33,7 +33,6 @@ from renku.core.storage import check_external_storage, track_paths_in_storage from renku.core.util import communication, requests from renku.core.util.dataset import check_url -from renku.core.util.dispatcher import get_client from renku.core.util.git import get_git_user from renku.core.util.os import delete_dataset_file, get_files, get_relative_path from renku.domain_model.dataset import Dataset, DatasetFile @@ -41,7 +40,6 @@ if TYPE_CHECKING: from renku.core.dataset.providers.models import DatasetAddMetadata - from renku.core.management.client import LocalClient def add_to_dataset( @@ -63,7 +61,6 @@ def add_to_dataset( **kwargs, ) -> Dataset: """Import the data into the data directory.""" - client = get_client() repository = project_context.repository sources = sources or [] @@ -92,7 +89,6 @@ def add_to_dataset( urls.append(str(file)) files = _download_files( - client=client, urls=urls, dataset=dataset, importer=importer, @@ -122,7 +118,7 @@ def add_to_dataset( if not overwrite: files, files_to_commit = _check_existing_files(dataset, files_to_commit, files) - move_files_to_dataset(client, files) + move_files_to_dataset(files) # Track non-symlinks in LFS if check_external_storage(): @@ -142,7 +138,7 @@ def add_to_dataset( return dataset - dataset_files = _generate_dataset_files(client, dataset, files, clear_files_before) + dataset_files = _generate_dataset_files(dataset, files, clear_files_before) dataset.add_or_update_files(dataset_files) datasets_provenance = DatasetsProvenance() @@ -164,7 +160,6 @@ def add_to_dataset( def _download_files( *, - client: "LocalClient", urls: List[str], importer: Optional[ImporterApi] = None, dataset: Dataset, @@ -182,7 +177,7 @@ def _download_files( ) if importer: - return importer.download_files(client=client, destination=destination, extract=extract) + return importer.download_files(destination=destination, extract=extract) if len(urls) == 0: raise errors.ParameterError("No URL is specified") @@ -200,7 +195,6 @@ def _download_files( provider = ProviderFactory.get_add_provider(uri=url) new_files = provider.add( - client=client, uri=url, destination=destination, revision=revision, @@ -292,7 +286,7 @@ def _check_existing_files(dataset: Dataset, files_to_commit: Set[str], files: Li return files, files_to_commit -def move_files_to_dataset(client: "LocalClient", files: List["DatasetAddMetadata"]): +def move_files_to_dataset(files: List["DatasetAddMetadata"]): """Copy/Move files into a dataset's directory.""" for file in files: if not file.has_action: @@ -307,20 +301,16 @@ def move_files_to_dataset(client: "LocalClient", files: List["DatasetAddMetadata elif file.action == DatasetAddAction.MOVE: shutil.move(file.source, file.destination, copy_function=shutil.copy) # type: ignore elif file.action == DatasetAddAction.SYMLINK: - create_external_file(client=client, target=file.source, path=file.destination) + create_external_file(target=file.source, path=file.destination) else: raise errors.OperationError(f"Invalid action {file.action}") -def _generate_dataset_files( - client: "LocalClient", dataset: Dataset, files: List["DatasetAddMetadata"], clear_files_before: bool = False -): +def _generate_dataset_files(dataset: Dataset, files: List["DatasetAddMetadata"], clear_files_before: bool = False): """Generate DatasetFile entries from file dict.""" dataset_files = [] for file in files: - dataset_file = DatasetFile.from_path( - client=client, path=file.entity_path, source=file.url, based_on=file.based_on - ) + dataset_file = DatasetFile.from_path(path=file.entity_path, source=file.url, based_on=file.based_on) dataset_files.append(dataset_file) if clear_files_before: diff --git a/renku/core/dataset/pointer_file.py b/renku/core/dataset/pointer_file.py index f493372453..79c3aea71c 100644 --- a/renku/core/dataset/pointer_file.py +++ b/renku/core/dataset/pointer_file.py @@ -20,18 +20,15 @@ import os import uuid from pathlib import Path -from typing import TYPE_CHECKING, Optional, Tuple, Union, cast +from typing import Optional, Tuple, Union, cast from renku.core import errors from renku.core.util.os import is_subpath from renku.domain_model.project_context import project_context from renku.infrastructure.repository import Repository -if TYPE_CHECKING: - from renku.core.management.client import LocalClient - -def create_pointer_file(client: "LocalClient", target: Union[str, Path], checksum: str = None): +def create_pointer_file(target: Union[str, Path], checksum: str = None): """Create a new pointer file.""" target = Path(target).resolve() @@ -57,9 +54,9 @@ def create_pointer_file(client: "LocalClient", target: Union[str, Path], checksu return path -def is_external_file_updated(client_path: Path, path: Union[Path, str]) -> Tuple[bool, str]: +def is_external_file_updated(project_path: Path, path: Union[Path, str]) -> Tuple[bool, str]: """Check if an update to an external file is available.""" - pointer_file = get_pointer_file(client_path, path) + pointer_file = get_pointer_file(project_path=project_path, path=path) try: target = pointer_file.resolve(strict=True) @@ -78,30 +75,30 @@ def is_external_file_updated(client_path: Path, path: Union[Path, str]) -> Tuple return updated, new_checksum -def update_external_file(client: "LocalClient", path: Union[Path, str], checksum: Optional[str]): +def update_external_file(path: Union[Path, str], checksum: Optional[str]): """Delete existing external file and create a new one.""" - pointer_file = get_pointer_file(project_context.path, path) + pointer_file = get_pointer_file(project_path=project_context.path, path=path) target = pointer_file.resolve() os.remove(pointer_file) absolute_path = project_context.path / path os.remove(absolute_path) - create_external_file(client=client, target=target, path=absolute_path, checksum=checksum) + create_external_file(target=target, path=absolute_path, checksum=checksum) -def create_external_file(client: "LocalClient", target: Path, path: Union[Path, str], checksum: str = None): +def create_external_file(target: Path, path: Union[Path, str], checksum: str = None): """Create a new external file.""" try: - pointer_file = create_pointer_file(client, target=target, checksum=checksum) + pointer_file = create_pointer_file(target=target, checksum=checksum) relative = os.path.relpath(pointer_file, Path(path).parent) os.symlink(relative, path) except OSError as e: raise errors.OperationError("Could not create symbolic link") from e -def get_pointer_file(client_path: Path, path: Union[Path, str]) -> Path: +def get_pointer_file(project_path: Path, path: Union[Path, str]) -> Path: """Return pointer file from an external file.""" - absolute_path = client_path / path + absolute_path = project_path / path link = absolute_path.parent / os.readlink(absolute_path) - return client_path / link + return project_path / link diff --git a/renku/core/dataset/providers/api.py b/renku/core/dataset/providers/api.py index 48c0c69fc8..59af0da47e 100644 --- a/renku/core/dataset/providers/api.py +++ b/renku/core/dataset/providers/api.py @@ -34,7 +34,6 @@ ProviderDatasetFile, ProviderParameter, ) - from renku.core.management.client import LocalClient from renku.domain_model.dataset import Dataset, DatasetTag @@ -103,7 +102,7 @@ def supports_import() -> bool: return False @staticmethod - def add(client: "LocalClient", uri: str, destination: Path, **kwargs) -> List["DatasetAddMetadata"]: + def add(uri: str, destination: Path, **kwargs) -> List["DatasetAddMetadata"]: """Add files from a URI to a dataset.""" raise NotImplementedError @@ -200,7 +199,7 @@ def is_version_equal_to(self, dataset: Any) -> bool: return self.version == getattr(dataset, "version", object()) @abc.abstractmethod - def download_files(self, client: "LocalClient", destination: Path, extract: bool) -> List["DatasetAddMetadata"]: + def download_files(self, destination: Path, extract: bool) -> List["DatasetAddMetadata"]: """Download dataset files from the remote provider.""" raise NotImplementedError diff --git a/renku/core/dataset/providers/dataverse.py b/renku/core/dataset/providers/dataverse.py index 3657c20036..1d3b7bb4b8 100644 --- a/renku/core/dataset/providers/dataverse.py +++ b/renku/core/dataset/providers/dataverse.py @@ -354,7 +354,7 @@ def get_access_token_url(self): """Endpoint for creation of access token.""" return urllib.parse.urljoin(self._server_url, "/dataverseuser.xhtml?selectTab=apiTokenTab") - def export(self, client=None, **kwargs): + def export(self, **kwargs): """Execute export process.""" from renku.domain_model.dataset import get_file_path_in_dataset diff --git a/renku/core/dataset/providers/doi.py b/renku/core/dataset/providers/doi.py index 5b0cb3f8a7..5d635fe3f1 100644 --- a/renku/core/dataset/providers/doi.py +++ b/renku/core/dataset/providers/doi.py @@ -131,7 +131,7 @@ def is_latest_version(self) -> bool: """Check if record is at last possible version.""" return True - def download_files(self, client, destination: Path, extract: bool): + def download_files(self, destination: Path, extract: bool): """Download dataset files from the remote provider.""" raise NotImplementedError diff --git a/renku/core/dataset/providers/git.py b/renku/core/dataset/providers/git.py index 1c1d053ee5..41f22f4377 100644 --- a/renku/core/dataset/providers/git.py +++ b/renku/core/dataset/providers/git.py @@ -36,7 +36,6 @@ if TYPE_CHECKING: from renku.core.dataset.providers.models import DatasetAddMetadata, ProviderParameter - from renku.core.management.client import LocalClient class GitProvider(ProviderApi): @@ -80,7 +79,6 @@ def get_add_parameters() -> List["ProviderParameter"]: @staticmethod def add( - client: "LocalClient", uri: str, destination: Path, *, @@ -96,7 +94,7 @@ def add( remote_repository = clone_repository( url=uri, - path=get_cache_directory_for_repository(client=client, url=uri), + path=get_cache_directory_for_repository(url=uri), checkout_revision=revision, depth=None, clean=True, diff --git a/renku/core/dataset/providers/local.py b/renku/core/dataset/providers/local.py index 708ee57d54..7566fa0292 100644 --- a/renku/core/dataset/providers/local.py +++ b/renku/core/dataset/providers/local.py @@ -34,7 +34,6 @@ if TYPE_CHECKING: from renku.core.dataset.providers.models import DatasetAddMetadata, ProviderParameter - from renku.core.management.client import LocalClient from renku.domain_model.dataset import Dataset, DatasetTag @@ -106,7 +105,6 @@ def get_export_parameters() -> List["ProviderParameter"]: @staticmethod def add( - client: "LocalClient", uri: str, destination: Path, *, @@ -257,7 +255,7 @@ def get_access_token_url(self): """Endpoint for creation of access token.""" return "" - def export(self, client=None, **kwargs) -> str: + def export(self, **kwargs) -> str: """Execute entire export process.""" from renku.command.schema.dataset import dump_dataset_as_jsonld from renku.core.util.yaml import write_yaml diff --git a/renku/core/dataset/providers/models.py b/renku/core/dataset/providers/models.py index 3d08b75ed6..2f99f2a96b 100644 --- a/renku/core/dataset/providers/models.py +++ b/renku/core/dataset/providers/models.py @@ -52,9 +52,9 @@ def has_action(self) -> bool: """Returns if file action is not NONE.""" return self.action != DatasetAddAction.NONE - def get_absolute_commit_path(self, client_path: Path) -> str: + def get_absolute_commit_path(self, project_path: Path) -> str: """Return path of the file in the repository.""" - return os.path.join(client_path, self.entity_path) + return os.path.join(project_path, self.entity_path) class ProviderParameter(NamedTuple): diff --git a/renku/core/dataset/providers/olos.py b/renku/core/dataset/providers/olos.py index 71e62b7317..16071a7cfa 100644 --- a/renku/core/dataset/providers/olos.py +++ b/renku/core/dataset/providers/olos.py @@ -103,7 +103,7 @@ def get_access_token_url(self): """Endpoint for creation of access token.""" return urllib.parse.urljoin(self._server_url, "portal") - def export(self, client=None, **kwargs): + def export(self, **kwargs): """Execute export process.""" from renku.domain_model.dataset import get_file_path_in_dataset diff --git a/renku/core/dataset/providers/renku.py b/renku/core/dataset/providers/renku.py index dd68c636a5..9a97cb9435 100644 --- a/renku/core/dataset/providers/renku.py +++ b/renku/core/dataset/providers/renku.py @@ -24,12 +24,10 @@ from pathlib import Path from typing import TYPE_CHECKING, Any, Dict, List, Optional -from renku.command.command_builder.command import inject from renku.command.login import read_renku_token from renku.core import errors from renku.core.dataset.datasets_provenance import DatasetsProvenance from renku.core.dataset.providers.api import ImporterApi, ProviderApi, ProviderPriority -from renku.core.interface.client_dispatcher import IClientDispatcher from renku.core.storage import pull_paths_from_storage from renku.core.util import communication from renku.core.util.git import clone_renku_repository, get_cache_directory_for_repository, get_file_size @@ -39,7 +37,6 @@ if TYPE_CHECKING: from renku.core.dataset.providers.models import DatasetAddMetadata, ProviderDataset, ProviderParameter - from renku.core.management.client import LocalClient from renku.domain_model.dataset import Dataset @@ -254,7 +251,6 @@ def __init__( self._project_url = None self._remote_repository = None - self._remote_client = None self._remote_path = None def fetch_provider_dataset(self) -> "ProviderDataset": @@ -263,7 +259,7 @@ def fetch_provider_dataset(self) -> "ProviderDataset": assert self._provider_dataset is not None, "Dataset wasn't fetched." return self._provider_dataset - def download_files(self, client: "LocalClient", destination: Path, extract: bool) -> List["DatasetAddMetadata"]: + def download_files(self, destination: Path, extract: bool) -> List["DatasetAddMetadata"]: """Download dataset files from the remote provider.""" from renku.core.dataset.providers.models import DatasetAddAction, DatasetAddMetadata from renku.domain_model.dataset import RemoteEntity @@ -305,8 +301,8 @@ def add_file(src_entity_path: str, content_path: Path, checksum) -> None: if already_copied: return - if is_external_file(path=src_entity_path, client_path=remote_repository.path): # type: ignore - source = (remote_repository.path / src_entity_path).resolve() # type: ignore + if is_external_file(path=src_entity_path, project_path=remote_repository.path): + source = (remote_repository.path / src_entity_path).resolve() action = DatasetAddAction.SYMLINK else: source = content_path @@ -440,14 +436,11 @@ def datadir_exists(self): """Whether the dataset data directory exists (might be missing in git if empty).""" return (self._remote_path / self.provider_dataset.get_datadir()).exists() - @inject.autoparams() - def _fetch_dataset(self, client_dispatcher: IClientDispatcher): + def _fetch_dataset(self): from renku.core.dataset.providers.models import ProviderDataset, ProviderDatasetFile - from renku.core.management.client import LocalClient from renku.domain_model.dataset import Url remote_repository = None - client = client_dispatcher.current_client parsed_uri = urllib.parse.urlparse(self.uri) @@ -458,7 +451,7 @@ def _fetch_dataset(self, client_dispatcher: IClientDispatcher): try: remote_repository = clone_renku_repository( url=url, - path=get_cache_directory_for_repository(client=client, url=url), + path=get_cache_directory_for_repository(url=url), gitlab_token=self._gitlab_token, deployment_hostname=parsed_uri.netloc, depth=None, @@ -476,39 +469,34 @@ def _fetch_dataset(self, client_dispatcher: IClientDispatcher): with project_context.with_path(remote_repository.path): self._remote_path = project_context.path - self._remote_client = LocalClient() - client_dispatcher.push_created_client_to_stack(self._remote_client) - try: - self._migrate_project() - self._remote_repository = remote_repository + self._migrate_project() + self._remote_repository = remote_repository - datasets_provenance = DatasetsProvenance() + datasets_provenance = DatasetsProvenance() - dataset = datasets_provenance.get_by_name(self._name) - if not dataset: - raise errors.ParameterError(f"Cannot find dataset '{self._name}' in project '{self._project_url}'") + dataset = datasets_provenance.get_by_name(self._name) + if not dataset: + raise errors.ParameterError(f"Cannot find dataset '{self._name}' in project '{self._project_url}'") - if self._tag: - tags = datasets_provenance.get_all_tags(dataset=dataset) - tag = next((t for t in tags if t.name == self._tag), None) + if self._tag: + tags = datasets_provenance.get_all_tags(dataset=dataset) + tag = next((t for t in tags if t.name == self._tag), None) - if tag is None: - raise errors.ParameterError(f"Cannot find tag '{self._tag}' for dataset '{self._name}'") + if tag is None: + raise errors.ParameterError(f"Cannot find tag '{self._tag}' for dataset '{self._name}'") - dataset = datasets_provenance.get_by_id(tag.dataset_id.value) - else: - tag = None + dataset = datasets_provenance.get_by_id(tag.dataset_id.value) + else: + tag = None - assert dataset is not None - provider_dataset = ProviderDataset.from_dataset(dataset) + assert dataset is not None + provider_dataset = ProviderDataset.from_dataset(dataset) - # NOTE: Set the dataset version to the given tag (to reset the version if no tag was provided) - provider_dataset.version = self._tag - # NOTE: Store the tag so that it can be checked later to see if a tag was specified for import - provider_dataset.tag = tag - finally: - client_dispatcher.pop_client() + # NOTE: Set the dataset version to the given tag (to reset the version if no tag was provided) + provider_dataset.version = self._tag + # NOTE: Store the tag so that it can be checked later to see if a tag was specified for import + provider_dataset.tag = tag provider_dataset.derived_from = None provider_dataset.same_as = Url(url_id=remove_credentials(self.latest_uri)) @@ -529,8 +517,8 @@ def _fetch_dataset(self, client_dispatcher: IClientDispatcher): @staticmethod def _migrate_project(): - from renku.core.management.migrate import is_project_unsupported, migrate # Slow import - from renku.core.migration.utils import MigrationType + from renku.core.migration.migrate import is_project_unsupported, migrate_project # Slow import + from renku.core.migration.models.migration import MigrationType if is_project_unsupported(): return @@ -538,7 +526,7 @@ def _migrate_project(): try: communication.disable() # NOTE: We are not interested in migrating workflows when importing datasets - migrate( + migrate_project( skip_template_update=True, skip_docker_update=True, migration_type=~MigrationType.WORKFLOWS, strict=True ) finally: diff --git a/renku/core/dataset/providers/repository.py b/renku/core/dataset/providers/repository.py index 61b40a3cba..c584c8a55b 100644 --- a/renku/core/dataset/providers/repository.py +++ b/renku/core/dataset/providers/repository.py @@ -26,14 +26,13 @@ if TYPE_CHECKING: from renku.core.dataset.providers.models import DatasetAddMetadata - from renku.core.management.client import LocalClient from renku.domain_model.dataset import Dataset class RepositoryImporter(ImporterApi, abc.ABC): """Online repository importer.""" - def download_files(self, client: "LocalClient", destination: Path, extract: bool) -> List["DatasetAddMetadata"]: + def download_files(self, destination: Path, extract: bool) -> List["DatasetAddMetadata"]: """Download dataset files from the remote provider.""" from renku.core.dataset.providers.web import download_files diff --git a/renku/core/dataset/providers/s3.py b/renku/core/dataset/providers/s3.py index 5e3b3c4316..aaa4c3b3de 100644 --- a/renku/core/dataset/providers/s3.py +++ b/renku/core/dataset/providers/s3.py @@ -32,7 +32,6 @@ from renku.domain_model.project_context import project_context if TYPE_CHECKING: - from renku.core.management.client import LocalClient from renku.domain_model.dataset import Dataset @@ -79,7 +78,7 @@ def get_add_parameters() -> List["ProviderParameter"]: ] @staticmethod - def add(client: "LocalClient", uri: str, destination: Path, **kwargs) -> List["DatasetAddMetadata"]: + def add(uri: str, destination: Path, **kwargs) -> List["DatasetAddMetadata"]: """Add files from a URI to a dataset.""" dataset = kwargs.get("dataset") if dataset and dataset.storage and not dataset.storage.lower().startswith("s3://"): diff --git a/renku/core/dataset/providers/web.py b/renku/core/dataset/providers/web.py index b7d2c3c25f..d26c2ce3dc 100644 --- a/renku/core/dataset/providers/web.py +++ b/renku/core/dataset/providers/web.py @@ -26,16 +26,15 @@ from renku.core import errors from renku.core.constant import CACHE -from renku.core.dataset.context import wait_for from renku.core.dataset.providers.api import ProviderApi, ProviderPriority from renku.core.util import communication +from renku.core.util.contexts import wait_for from renku.core.util.dataset import check_url from renku.core.util.urls import remove_credentials from renku.domain_model.project_context import project_context if TYPE_CHECKING: from renku.core.dataset.providers.models import DatasetAddMetadata - from renku.core.management.client import LocalClient class WebProvider(ProviderApi): @@ -57,7 +56,6 @@ def supports_add() -> bool: @staticmethod def add( - client: "LocalClient", uri: str, destination: Path, *, diff --git a/renku/core/dataset/providers/zenodo.py b/renku/core/dataset/providers/zenodo.py index d00a725254..3ec75445ee 100644 --- a/renku/core/dataset/providers/zenodo.py +++ b/renku/core/dataset/providers/zenodo.py @@ -367,7 +367,7 @@ def dataset_to_request(self): jsonld["upload_type"] = "dataset" return jsonld - def export(self, client=None, **kwargs): + def export(self, **kwargs): """Execute entire export process.""" # Step 1. Create new deposition deposition = ZenodoDeposition(exporter=self) diff --git a/renku/core/git.py b/renku/core/git.py index 5365287a0b..b3c80293e4 100644 --- a/renku/core/git.py +++ b/renku/core/git.py @@ -15,106 +15,34 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Wrap Git functionality.""" +"""Git repository management.""" import os import sys import tempfile -import time import uuid from contextlib import contextmanager from pathlib import Path -from typing import Dict, List, Optional, Tuple +from typing import Tuple -from renku.command.command_builder import inject from renku.core import errors -from renku.core.interface.database_gateway import IDatabaseGateway -from renku.core.interface.project_gateway import IProjectGateway from renku.core.storage import checkout_paths_from_storage from renku.core.util.contexts import Isolation from renku.core.util.git import get_dirty_paths from renku.core.util.os import get_absolute_path -from renku.core.util.urls import remove_credentials -from renku.domain_model.project import Project from renku.domain_model.project_context import project_context from renku.infrastructure.repository import Repository -COMMIT_DIFF_STRATEGY = "DIFF" -STARTED_AT = int(time.time() * 1e3) - @contextmanager -@inject.autoparams("project_gateway", "database_gateway") -def with_project_metadata( - project_gateway: IProjectGateway, - database_gateway: IDatabaseGateway, - read_only: bool = False, - name: Optional[str] = None, - namespace: Optional[str] = None, - description: Optional[str] = None, - keywords: Optional[List[str]] = None, - custom_metadata: Optional[Dict] = None, -): - """Yield an editable metadata object. - - Args: - project_gateway(IProjectGateway): Injected project gateway. - database_gateway(IDatabaseGateway): Injected database gateway. - read_only(bool): Whether to save changes or not (Default value = False). - name(Optional[str]): Name of the project (when creating a new one) (Default value = None). - namespace(Optional[str]): Namespace of the project (when creating a new one) (Default value = None). - description(Optional[str]): Project description (when creating a new one) (Default value = None). - keywords(Optional[List[str]]): Keywords for the project (when creating a new one) (Default value = None). - custom_metadata(Optional[Dict]): Custom JSON-LD metadata (when creating a new project) - (Default value = None). - """ - try: - project = project_gateway.get_project() - except ValueError: - project = Project.from_client( - name=name, namespace=namespace, description=description, keywords=keywords, custom_metadata=custom_metadata - ) - - yield project - - if not read_only: - project_gateway.update_project(project) - database_gateway.commit() - - -@contextmanager -def commit( - commit_only=None, - commit_empty=True, - raise_if_empty=False, - commit_message=None, - abbreviate_message=True, - skip_dirty_checks=False, -): - """Automatic commit.""" - diff_before = prepare_commit(commit_only=commit_only, skip_dirty_checks=skip_dirty_checks) - - yield - - finalize_commit( - diff_before=diff_before, - commit_only=commit_only, - commit_empty=commit_empty, - raise_if_empty=raise_if_empty, - commit_message=commit_message, - abbreviate_message=abbreviate_message, - ) - - -@contextmanager -def worktree(path=None, branch_name=None, commit=None, merge_args=("--ff-only",)): +def with_worktree(path=None, branch_name=None, commit=None, merge_args=("--ff-only",)): """Create new worktree.""" from renku.infrastructure.repository import NULL_TREE delete = branch_name is None new_branch = commit is not NULL_TREE - new_client, isolation, path, branch_name = prepare_worktree(path=path, branch_name=branch_name, commit=commit) + _, isolation, path, branch_name = prepare_worktree(path=path, branch_name=branch_name, commit=commit) try: yield except (Exception, BaseException) as e: @@ -139,122 +67,6 @@ def worktree(path=None, branch_name=None, commit=None, merge_args=("--ff-only",) ) -def prepare_commit(commit_only=None, skip_dirty_checks=False, skip_staging: bool = False): - """Gather information about repo needed for committing later on.""" - repository = project_context.repository - - def ensure_not_untracked(path): - """Ensure that path is not part of git untracked files.""" - for file_path in repository.untracked_files: - is_parent = (project_context.path / file_path).parent == (project_context.path / path) - is_equal = str(path) == file_path - - if is_parent or is_equal: - raise errors.DirtyRenkuDirectory(repository) - - def ensure_not_staged(path): - """Ensure that path is not part of git staged files.""" - path = str(path) - for file_path in repository.staged_changes: - is_parent = str(file_path.a_path).startswith(path) - is_equal = path == file_path.a_path - - if is_parent or is_equal: - raise errors.DirtyRenkuDirectory(repository) - - if skip_staging: - if not isinstance(commit_only, list) or len(commit_only) == 0: - raise errors.OperationError("Cannot use ``skip_staging`` without specifying files to commit.") - - diff_before = set() - - if commit_only == COMMIT_DIFF_STRATEGY: - if len(repository.staged_changes) > 0 or len(repository.unstaged_changes) > 0: - repository.reset() - - # Exclude files created by pipes. - diff_before = { - file for file in repository.untracked_files if STARTED_AT - int(Path(file).stat().st_ctime * 1e3) >= 1e3 - } - - if isinstance(commit_only, list) and not skip_dirty_checks: - for path in commit_only: - ensure_not_untracked(path) - ensure_not_staged(path) - - return diff_before - - -def finalize_commit( - diff_before, - commit_only=None, - commit_empty=True, - raise_if_empty=False, - commit_message=None, - abbreviate_message=True, - skip_staging: bool = False, -): - """Commit modified/added paths.""" - from renku.core.util.git import shorten_message - from renku.infrastructure.repository import Actor - from renku.version import __version__, version_url - - committer = Actor(name=f"renku {__version__}", email=version_url) - repository = project_context.repository - - change_types = {item.a_path: item.change_type for item in repository.unstaged_changes} - - if commit_only == COMMIT_DIFF_STRATEGY: - # Get diff generated in command. - staged_after = set(change_types.keys()) - - modified_after_change_types = {item.a_path: item.change_type for item in repository.staged_changes} - - modified_after = set(modified_after_change_types.keys()) - - change_types.update(modified_after_change_types) - - diff_after = set(repository.untracked_files).union(staged_after).union(modified_after) - - # Remove files not touched in command. - commit_only = list(diff_after - diff_before) - - if isinstance(commit_only, list): - for path_ in commit_only: - p = project_context.path / path_ - if p.exists() or change_types.get(str(path_)) == "D": - repository.add(path_) - - if not commit_only: - repository.add(all=True) - - try: - diffs = [d.a_path for d in repository.staged_changes] - except errors.GitError: - diffs = [] - - if not commit_empty and not diffs: - if raise_if_empty: - raise errors.NothingToCommit() - return - - if commit_message and not isinstance(commit_message, str): - raise errors.CommitMessageEmpty() - - elif not commit_message: - argv = [os.path.basename(sys.argv[0])] + [remove_credentials(arg) for arg in sys.argv[1:]] - - commit_message = " ".join(argv) - - if abbreviate_message: - commit_message = shorten_message(commit_message) - - # NOTE: Only commit specified paths when skipping staging area - paths = commit_only if skip_staging else [] - # Ignore pre-commit hooks since we have already done everything. - repository.commit(commit_message + project_context.transaction_id, committer=committer, no_verify=True, paths=paths) - - def prepare_worktree(path=None, branch_name=None, commit=None) -> Tuple[Repository, Isolation, Path, str]: """Set up a Git worktree to provide isolation.""" from renku.infrastructure.repository import NULL_TREE diff --git a/renku/core/incubation/__init__.py b/renku/core/incubation/__init__.py deleted file mode 100644 index d6b64b305e..0000000000 --- a/renku/core/incubation/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2018-2022 - Swiss Data Science Center (SDSC) -# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and -# Eidgenössische Technische Hochschule Zürich (ETHZ). -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Namespace for incubating new features.""" diff --git a/renku/core/interface/client_dispatcher.py b/renku/core/interface/client_dispatcher.py deleted file mode 100644 index decc6fe2c1..0000000000 --- a/renku/core/interface/client_dispatcher.py +++ /dev/null @@ -1,49 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2017-2022 - Swiss Data Science Center (SDSC) -# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and -# Eidgenössische Technische Hochschule Zürich (ETHZ). -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Renku client dispatcher interface.""" - -from abc import ABC -from pathlib import Path -from typing import TYPE_CHECKING, Union - -if TYPE_CHECKING: - from renku.core.management.client import LocalClient - - -class IClientDispatcher(ABC): - """Interface for the ClientDispatcher. - - Handles getting current client (LocalClient) and entering/exiting the stack for the client. - """ - - @property - def current_client(self) -> "LocalClient": - """Get the currently active client.""" - raise NotImplementedError - - def push_client_to_stack(self, path: Union[Path, str]) -> "LocalClient": - """Create and push a new client to the stack.""" - raise NotImplementedError - - def push_created_client_to_stack(self, client: "LocalClient") -> None: - """Push an already created client to the stack.""" - raise NotImplementedError - - def pop_client(self) -> None: - """Remove the current client from the stack.""" - raise NotImplementedError diff --git a/renku/core/management/__init__.py b/renku/core/management/__init__.py deleted file mode 100644 index 08fb1a7a08..0000000000 --- a/renku/core/management/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2017-2022 - Swiss Data Science Center (SDSC) -# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and -# Eidgenössische Technische Hochschule Zürich (ETHZ). -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Renku repository management.""" diff --git a/renku/core/management/client.py b/renku/core/management/client.py deleted file mode 100644 index f48f7fbfc6..0000000000 --- a/renku/core/management/client.py +++ /dev/null @@ -1,25 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2017-2022 - Swiss Data Science Center (SDSC) -# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and -# Eidgenössische Technische Hochschule Zürich (ETHZ). -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Renku repository manager.""" - -import attr - - -@attr.s -class LocalClient: - """A low-level client for communicating with a local Renku repository.""" diff --git a/renku/core/migration/m_0003__0_pyld2.py b/renku/core/migration/m_0003__0_pyld2.py index d0d6f42500..295cfdb61e 100644 --- a/renku/core/migration/m_0003__0_pyld2.py +++ b/renku/core/migration/m_0003__0_pyld2.py @@ -25,6 +25,6 @@ from .m_0005__1_pyld2 import migrate_datasets_for_pyld2 -def migrate(migration_context): +def migrate(_): """Migration function.""" - migrate_datasets_for_pyld2(migration_context.client) + migrate_datasets_for_pyld2() diff --git a/renku/core/migration/m_0003__1_jsonld.py b/renku/core/migration/m_0003__1_jsonld.py index 5cf9f4b7f6..9411eb544f 100644 --- a/renku/core/migration/m_0003__1_jsonld.py +++ b/renku/core/migration/m_0003__1_jsonld.py @@ -16,11 +16,13 @@ # See the License for the specific language governing permissions and # limitations under the License. """JSON-LD dataset migrations.""" + import itertools import json import os import uuid from pathlib import Path +from typing import Dict import pyld @@ -34,14 +36,14 @@ from renku.domain_model.project_context import project_context -def migrate(migration_context): +def migrate(_): """Migration function.""" - _migrate_project_metadata(migration_context.client) - _migrate_datasets_metadata(migration_context.client) + _migrate_project_metadata() + _migrate_datasets_metadata() -def _migrate_project_metadata(client): +def _migrate_project_metadata(): """Apply all initial JSON-LD migrations to project.""" jsonld_translate = { "http://schema.org/name": "http://xmlns.com/foaf/0.1/name", @@ -58,7 +60,7 @@ def _migrate_project_metadata(client): ) -def _migrate_datasets_metadata(client): +def _migrate_datasets_metadata(): """Apply all initial JSON-LD migrations to datasets.""" jsonld_migrations = { "dctypes:Dataset": [_migrate_dataset_schema, _migrate_absolute_paths], @@ -71,22 +73,21 @@ def _migrate_datasets_metadata(client): } old_metadata_paths = get_pre_0_3_4_datasets_metadata() - new_metadata_paths = get_datasets_path(client).rglob(OLD_METADATA_PATH) + new_metadata_paths = get_datasets_path().rglob(OLD_METADATA_PATH) for path in itertools.chain(old_metadata_paths, new_metadata_paths): _apply_on_the_fly_jsonld_migrations( path=path, jsonld_context=_INITIAL_JSONLD_DATASET_CONTEXT, fields=_DATASET_FIELDS, - client=client, jsonld_migrations=jsonld_migrations, ) def _apply_on_the_fly_jsonld_migrations( - path, jsonld_context, fields, client=None, jsonld_migrations=None, jsonld_translate=None, persist_changes=True + path, jsonld_context, fields, jsonld_migrations=None, jsonld_translate=None, persist_changes=True ): - data = read_yaml(path) + data: Dict = read_yaml(path) if not isinstance(data, dict) and not isinstance(data, list): # NOTE: metadata file is probably not an actual renku file @@ -116,7 +117,7 @@ def _apply_on_the_fly_jsonld_migrations( migrations += jsonld_migrations.get(schema_type, []) for migration in set(migrations): - data = migration(data, client) + data = migration(data) if data["@context"] != jsonld_context: # merge new context into old context to prevent properties @@ -151,7 +152,7 @@ def _apply_on_the_fly_jsonld_migrations( write_yaml(path, data) -def _migrate_dataset_schema(data, client): +def _migrate_dataset_schema(data): """Migrate from old dataset formats.""" if "authors" not in data: return @@ -170,7 +171,7 @@ def _migrate_dataset_schema(data, client): return data -def _migrate_absolute_paths(data, client): +def _migrate_absolute_paths(data): """Migrate dataset paths to use relative path.""" raw_path = data.get("path", ".") path = Path(raw_path) @@ -196,7 +197,7 @@ def _migrate_absolute_paths(data, client): return data -def _migrate_doi_identifier(data, client): +def _migrate_doi_identifier(data): """If the dataset _id is doi, make it a UUID.""" from renku.core.util.doi import is_doi from renku.core.util.uuid import is_uuid @@ -226,7 +227,7 @@ def _migrate_doi_identifier(data, client): return data -def _migrate_same_as_structure(data, client): +def _migrate_same_as_structure(data): """Changes sameAs string to schema:URL object.""" same_as = data.get("same_as") @@ -246,11 +247,10 @@ def _migrate_same_as_structure(data, client): return data -def _migrate_dataset_file_id(data, client): +def _migrate_dataset_file_id(data): """Ensure dataset files have a fully qualified url as id.""" host = "localhost" - if client: - host = project_context.remote.host or host + host = project_context.remote.host or host host = os.environ.get("RENKU_DOMAIN") or host files = data.get("files", []) diff --git a/renku/core/migration/m_0003__2_initial.py b/renku/core/migration/m_0003__2_initial.py index f043dd3c5e..ac2d2dc29c 100644 --- a/renku/core/migration/m_0003__2_initial.py +++ b/renku/core/migration/m_0003__2_initial.py @@ -24,9 +24,8 @@ from renku.core.constant import DEFAULT_DATA_DIR as DATA_DIR from renku.core.constant import RENKU_HOME -from renku.core.git import with_project_metadata from renku.core.migration.models.refs import LinkReference -from renku.core.migration.models.v3 import Collection, Dataset, Project, get_client_datasets +from renku.core.migration.models.v3 import Collection, Dataset, Project, get_project_datasets from renku.core.migration.models.v9 import generate_file_id, generate_label from renku.core.migration.utils import ( OLD_METADATA_PATH, @@ -35,6 +34,7 @@ get_pre_0_3_4_datasets_metadata, is_using_temporary_datasets_path, ) +from renku.core.util.contexts import with_project_metadata from renku.core.util.git import get_in_submodules from renku.core.util.urls import url_to_string from renku.domain_model.dataset import generate_default_name @@ -43,14 +43,13 @@ def migrate(migration_context): """Migration function.""" - client = migration_context.client _ensure_clean_lock() _do_not_track_lock_file() - _migrate_datasets_pre_v0_3(client) - _migrate_broken_dataset_paths(client) - _fix_labels_and_ids(client) - _fix_dataset_urls(client) - _migrate_dataset_and_files_project(client) + _migrate_datasets_pre_v0_3() + _migrate_broken_dataset_paths(migration_context=migration_context) + _fix_labels_and_ids(migration_context) + _fix_dataset_urls() + _migrate_dataset_and_files_project() def _ensure_clean_lock(): @@ -66,18 +65,17 @@ def _ensure_clean_lock(): def _do_not_track_lock_file(): - """Add lock file to .gitingore if not already exists.""" - # Add lock file to .gitignore. + """Add lock file to .gitignore if not already exists.""" if is_using_temporary_datasets_path(): return lock_file = ".renku.lock" gitignore = project_context.path / ".gitignore" if not gitignore.exists() or lock_file not in gitignore.read_text(): - gitignore.open("a").write("\n{0}\n".format(lock_file)) + gitignore.open("a").write(f"\n{lock_file}\n") -def _migrate_datasets_pre_v0_3(client): +def _migrate_datasets_pre_v0_3(): """Migrate datasets from Renku 0.3.x.""" if is_using_temporary_datasets_path(): return @@ -89,10 +87,10 @@ def _migrate_datasets_pre_v0_3(client): changed = True name = str(old_path.parent.relative_to(project_context.path / DATA_DIR)) - dataset = Dataset.from_yaml(old_path, client) + dataset = Dataset.from_yaml(old_path) dataset.title = name dataset.name = generate_default_name(name) - new_path = get_datasets_path(client) / dataset.identifier / OLD_METADATA_PATH + new_path = get_datasets_path() / dataset.identifier / OLD_METADATA_PATH new_path.parent.mkdir(parents=True, exist_ok=True) with with_project_metadata(read_only=True) as meta: @@ -122,15 +120,15 @@ def _migrate_datasets_pre_v0_3(client): repository.commit("renku migrate: committing structural changes" + project_context.transaction_id) -def _migrate_broken_dataset_paths(client): +def _migrate_broken_dataset_paths(migration_context): """Ensure all paths are using correct directory structure.""" - for dataset in get_client_datasets(client): + for dataset in get_project_datasets(): if not dataset.name: dataset.name = generate_default_name(dataset.title, dataset.version) else: dataset.name = generate_default_name(dataset.name) - expected_path = get_datasets_path(client) / dataset.identifier + expected_path = get_datasets_path() / dataset.identifier # migrate the refs if not is_using_temporary_datasets_path(): @@ -166,46 +164,45 @@ def _migrate_broken_dataset_paths(client): dataset.files = files - for file_ in dataset.files: - if _is_dir(client=client, path=file_.path): + for file in dataset.files: + if _is_dir(migration_context=migration_context, path=file.path): continue - if file_.path.startswith(".."): - file_.path = Path( - os.path.abspath(get_datasets_path(client) / dataset.identifier / file_.path) - ).relative_to(base_path) - elif not _exists(client=client, path=file_.path): - file_.path = (project_context.path / DATA_DIR / file_.path).relative_to(project_context.path) + if file.path.startswith(".."): + file_absolute_path = os.path.abspath(get_datasets_path() / dataset.identifier / file.path) + file.path = Path(file_absolute_path).relative_to(base_path) + elif not _exists(migration_context=migration_context, path=file.path): + file.path = (project_context.path / DATA_DIR / file.path).relative_to(project_context.path) - file_.name = os.path.basename(file_.path) + file.name = os.path.basename(file.path) dataset.to_yaml(expected_path / "metadata.yml") -def _fix_labels_and_ids(client): +def _fix_labels_and_ids(migration_context): """Ensure files have correct label instantiation.""" - for dataset in get_client_datasets(client): - dataset._id = generate_dataset_id(client, identifier=dataset.identifier) + for dataset in get_project_datasets(): + dataset._id = generate_dataset_id(identifier=dataset.identifier) dataset._label = dataset.identifier - for file_ in dataset.files: - if not _exists(client=client, path=file_.path): + for file in dataset.files: + if not _exists(migration_context=migration_context, path=file.path): continue - _, _, commit, _ = get_in_submodules( - project_context.repository, _get_previous_commit(client, file_.path, revision="HEAD"), file_.path - ) - if not _is_file_id_valid(file_._id, file_.path, commit.hexsha): - file_._id = generate_file_id(client, hexsha=commit.hexsha, path=file_.path) + commit = _get_previous_commit(migration_context=migration_context, path=file.path) + _, commit, _ = get_in_submodules(repository=project_context.repository, commit=commit, path=file.path) - if not file_._label or commit.hexsha not in file_._label or file_.path not in file_._label: - file_._label = generate_label(file_.path, commit.hexsha) + if not _is_file_id_valid(file._id, file.path, commit.hexsha): + file._id = generate_file_id(hexsha=commit.hexsha, path=file.path) + + if not file._label or commit.hexsha not in file._label or file.path not in file._label: + file._label = generate_label(file.path, commit.hexsha) dataset.to_yaml() -def _fix_dataset_urls(client): +def _fix_dataset_urls(): """Ensure dataset and its files have correct url format.""" - for dataset in get_client_datasets(client): + for dataset in get_project_datasets(): dataset.url = dataset._id for file_ in dataset.files: if file_.url: @@ -214,14 +211,14 @@ def _fix_dataset_urls(client): dataset.to_yaml() -def _migrate_dataset_and_files_project(client): +def _migrate_dataset_and_files_project(): """Ensure dataset files have correct project.""" project_path = project_context.metadata_path.joinpath(OLD_METADATA_PATH) project = Project.from_yaml(project_path) if not is_using_temporary_datasets_path(): project.to_yaml(project_path) - for dataset in get_client_datasets(client): + for dataset in get_project_datasets(): dataset._project = project if not dataset.creators: dataset.creators = [project.creator] @@ -239,24 +236,25 @@ def _is_file_id_valid(id_, path, hexsha): return u.scheme and u.netloc and u.path.startswith("/blob") and hexsha in u.path and path in u.path -def _exists(client, path): - dmc = getattr(client, "dataset_migration_context", None) +def _exists(migration_context, path): + dmc = migration_context.dataset_migration_context if dmc: return dmc.exists(path) - path = Path(path) + path = project_context.path / path return path.exists() or (path.is_symlink() and os.path.lexists(path)) -def _is_dir(client, path): - dmc = getattr(client, "dataset_migration_context", None) +def _is_dir(migration_context, path): + dmc = migration_context.dataset_migration_context if dmc: return dmc.is_dir(path) - return Path(path).is_dir() + + return (project_context.path / path).is_dir() -def _get_previous_commit(client, path, revision): - dmc = getattr(client, "dataset_migration_context", None) +def _get_previous_commit(migration_context, path): + dmc = migration_context.dataset_migration_context if dmc: return dmc.get_previous_commit(path) - return project_context.repository.get_previous_commit(path, revision=revision) + return project_context.repository.get_previous_commit(path, revision="HEAD") diff --git a/renku/core/migration/m_0004__0_pyld2.py b/renku/core/migration/m_0004__0_pyld2.py index d0d6f42500..295cfdb61e 100644 --- a/renku/core/migration/m_0004__0_pyld2.py +++ b/renku/core/migration/m_0004__0_pyld2.py @@ -25,6 +25,6 @@ from .m_0005__1_pyld2 import migrate_datasets_for_pyld2 -def migrate(migration_context): +def migrate(_): """Migration function.""" - migrate_datasets_for_pyld2(migration_context.client) + migrate_datasets_for_pyld2() diff --git a/renku/core/migration/m_0004__submodules.py b/renku/core/migration/m_0004__submodules.py index 698ec475cd..0814cb0685 100644 --- a/renku/core/migration/m_0004__submodules.py +++ b/renku/core/migration/m_0004__submodules.py @@ -16,17 +16,15 @@ # See the License for the specific language governing permissions and # limitations under the License. """Migrate datasets based on Git submodules.""" + import glob import os import shutil from pathlib import Path -from renku.command.command_builder.command import inject from renku.core import errors -from renku.core.interface.client_dispatcher import IClientDispatcher -from renku.core.management.client import LocalClient -from renku.core.migration.m_0009__new_metadata_storage import _fetch_datasets -from renku.core.migration.models.v3 import DatasetFileSchemaV3, get_client_datasets +from renku.core.migration.m_0009__new_metadata_storage import fetch_datasets +from renku.core.migration.models.v3 import DatasetFileSchemaV3, get_project_datasets from renku.core.migration.models.v9 import DatasetFile, OldDatasetFileSchema, generate_file_id, generate_label from renku.core.util.urls import remove_credentials from renku.domain_model.project_context import project_context @@ -35,12 +33,11 @@ def migrate(migration_context): """Migration function.""" - _migrate_submodule_based_datasets(migration_context.client) + _migrate_submodule_based_datasets(migration_context) -@inject.autoparams() -def _migrate_submodule_based_datasets(client, client_dispatcher: IClientDispatcher): - from renku.core.management.migrate import is_project_unsupported, migrate +def _migrate_submodule_based_datasets(migration_context): + from renku.core.migration.migrate import is_project_unsupported, migrate_project submodules = project_context.repository.submodules if len(submodules) == 0: @@ -51,7 +48,7 @@ def _migrate_submodule_based_datasets(client, client_dispatcher: IClientDispatch submodules.update() - for dataset in get_client_datasets(client): + for dataset in get_project_datasets(): for file_ in dataset.files: path = project_context.path / file_.path if not path.is_symlink(): @@ -73,23 +70,14 @@ def _migrate_submodule_based_datasets(client, client_dispatcher: IClientDispatch submodules_urls = {s.relative_path: s.url for s in submodules} - remote_clients = dict() for repo_path in repo_paths: with project_context.with_path(repo_path, save_changes=True): - remote_client = LocalClient() - remote_clients[repo_path] = remote_client - client_dispatcher.push_created_client_to_stack(remote_client) - - try: - if not is_project_unsupported(): - migrate(skip_template_update=True, skip_docker_update=True) - finally: - client_dispatcher.pop_client() + if not is_project_unsupported(): + migrate_project(skip_template_update=True, skip_docker_update=True) metadata = {} for path, target, repo_path in symlinks: - remote_client = remote_clients[repo_path] path_within_repo = target.relative_to(repo_path) repo_is_remote = ".renku/vendors/local" not in str(repo_path) @@ -100,14 +88,14 @@ def _migrate_submodule_based_datasets(client, client_dispatcher: IClientDispatch if repo_is_remote: with project_context.with_path(repo_path): - based_on = _fetch_file_metadata(remote_client, path_within_repo) + based_on = _fetch_file_metadata(migration_context=migration_context, path=path_within_repo) if based_on: based_on.url = url based_on.based_on = None else: - based_on = DatasetFile.from_revision(remote_client, path=path_within_repo, url=url) - data = OldDatasetFileSchema(client=remote_client).dump(based_on) - based_on = DatasetFileSchemaV3(client=remote_client).load(data) + based_on = DatasetFile.from_revision(path=path_within_repo, url=url) + data = OldDatasetFileSchema().dump(based_on) + based_on = DatasetFileSchemaV3().load(data) else: if url: full_path = Path(url) / path_within_repo @@ -133,23 +121,29 @@ def _migrate_submodule_based_datasets(client, client_dispatcher: IClientDispatch except errors.GitError: pass - for dataset in get_client_datasets(client): + for dataset in get_project_datasets(): for file_ in dataset.files: if file_.path in metadata: based_on, url = metadata[file_.path] file_.based_on = based_on file_.url = remove_credentials(url) file_.commit = repository.get_previous_commit(file_.path) - file_._id = generate_file_id(client, hexsha=file_.commit.hexsha, path=file_.path) + file_._id = generate_file_id(hexsha=file_.commit.hexsha, path=file_.path) file_._label = generate_label(file_.path, file_.commit.hexsha) dataset.to_yaml() -def _fetch_file_metadata(client, path): +def _fetch_file_metadata(migration_context, path): """Return metadata for a single file.""" - paths = glob.glob(f"{project_context.path}/.renku/datasets/*/*.yml" "") - for dataset in _fetch_datasets(client, project_context.repository.head.commit.hexsha, paths, [])[0]: + datasets, _ = fetch_datasets( + migration_context=migration_context, + revision=project_context.repository.head.commit.hexsha, + paths=glob.glob(f"{project_context.path}/.renku/datasets/*/*.yml" ""), + deleted_paths=[], + ) + + for dataset in datasets: for file in dataset.files: if file.entity.path == path: return file diff --git a/renku/core/migration/m_0005__1_pyld2.py b/renku/core/migration/m_0005__1_pyld2.py index 28905400ca..36b623fe7f 100644 --- a/renku/core/migration/m_0005__1_pyld2.py +++ b/renku/core/migration/m_0005__1_pyld2.py @@ -22,14 +22,14 @@ from renku.core.migration.utils import OLD_METADATA_PATH, get_datasets_path -def migrate(migration_context): +def migrate(_): """Migration function.""" - migrate_datasets_for_pyld2(migration_context.client) + migrate_datasets_for_pyld2() -def migrate_datasets_for_pyld2(client): +def migrate_datasets_for_pyld2(): """Migrate type scoped contexts of datasets.""" - paths = get_datasets_path(client).rglob(OLD_METADATA_PATH) + paths = get_datasets_path().rglob(OLD_METADATA_PATH) for path in paths: with path.open("r") as dataset: diff --git a/renku/core/migration/m_0005__2_cwl.py b/renku/core/migration/m_0005__2_cwl.py index 7a6ea32bf4..b42bd183f9 100644 --- a/renku/core/migration/m_0005__2_cwl.py +++ b/renku/core/migration/m_0005__2_cwl.py @@ -21,7 +21,6 @@ import os import traceback import uuid -from collections import defaultdict from functools import cmp_to_key from hashlib import sha1 from pathlib import Path @@ -31,8 +30,8 @@ from cwl_utils.parser.cwl_v1_0 import CommandLineTool, InitialWorkDirRequirement from werkzeug.utils import secure_filename -from renku.core import errors from renku.core.constant import RENKU_HOME +from renku.core.migration.models.migration import MigrationType, RepositoryCache from renku.core.migration.models.v3 import Dataset from renku.core.migration.models.v9 import ( Collection, @@ -47,11 +46,11 @@ SoftwareAgent, WorkflowRun, ) -from renku.core.migration.utils import OLD_DATASETS_PATH, OLD_WORKFLOW_PATH, MigrationType +from renku.core.migration.utils import OLD_DATASETS_PATH, OLD_WORKFLOW_PATH from renku.core.util import communication from renku.core.util.git import get_in_submodules from renku.domain_model.project_context import project_context -from renku.infrastructure.repository import Actor, Commit, git_unicode_unescape +from renku.infrastructure.repository import Actor, Commit from renku.version import __version__, version_url default_missing_software_agent = SoftwareAgent( @@ -63,10 +62,15 @@ def migrate(migration_context): """Migration function.""" if MigrationType.WORKFLOWS not in migration_context.options.type: return - _migrate_old_workflows(client=migration_context.client, strict=migration_context.options.strict) + migration_context.cwl_cache = {} + try: + _migrate_old_workflows(migration_context=migration_context, strict=migration_context.options.strict) + finally: + migration_context.cwl_cache = None -def _migrate_old_workflows(client, strict): + +def _migrate_old_workflows(migration_context, strict): """Migrates old cwl workflows to new jsonld format.""" def sort_cwl_commits(e1, e2): @@ -76,61 +80,68 @@ def sort_cwl_commits(e1, e2): return commit1.compare_to(commit2) - cache = RepositoryCache.from_client(client) - client.cache = cache + cache = RepositoryCache.from_repository(project_context.repository) + migration_context.cache = cache - wf_path = f"{project_context.metadata_path}/workflow/*.cwl" - for path in glob.glob(wf_path): - if path not in cache.cwl_files_commits: - raise ValueError(f"Couldn't find a previous commit for path `{path}`") + try: + wf_path = f"{project_context.metadata_path}/workflow/*.cwl" + for path in glob.glob(wf_path): + if path not in cache.cwl_files_commits: + raise ValueError(f"Couldn't find a previous commit for path `{path}`") - cwl_paths = list(cache.cwl_files_commits.items()) - cwl_paths = sorted(cwl_paths, key=cmp_to_key(sort_cwl_commits)) + cwl_paths = list(cache.cwl_files_commits.items()) + cwl_paths = sorted(cwl_paths, key=cmp_to_key(sort_cwl_commits)) - for n, element in enumerate(cwl_paths, start=1): - communication.echo(f"Processing commit {n}/{len(cwl_paths)}", end="\r") + for n, element in enumerate(cwl_paths, start=1): + communication.echo(f"Processing commit {n}/{len(cwl_paths)}", end="\r") - cwl_file, commit = element - repository = project_context.repository + cwl_file, commit = element + repository = project_context.repository - try: - if not Path(cwl_file).exists(): - continue + try: + if not Path(cwl_file).exists(): + continue - path = _migrate_cwl(client, cwl_file, commit) - os.remove(cwl_file) + path = _migrate_cwl(migration_context=migration_context, path=cwl_file, commit=commit) + os.remove(cwl_file) - repository.add(cwl_file, path) + repository.add(cwl_file, path) - if repository.is_dirty(): - commit_msg = "renku migrate: committing migrated workflow" - committer = Actor(name=f"renku {__version__}", email=version_url) - repository.commit(commit_msg + project_context.transaction_id, committer=committer, no_verify=True) - except Exception: - if strict: - raise - communication.echo("") - communication.warn(f"Cannot process commit '{commit.hexsha}' - Exception: {traceback.format_exc()}") + if repository.is_dirty(): + commit_msg = "renku migrate: committing migrated workflow" + committer = Actor(name=f"renku {__version__}", email=version_url) + repository.commit(commit_msg + project_context.transaction_id, committer=committer, no_verify=True) + except Exception: + if strict: + raise + communication.echo("") + communication.warn(f"Cannot process commit '{commit.hexsha}' - Exception: {traceback.format_exc()}") + finally: + migration_context.cache = None -def _migrate_cwl(client, path, commit): +def _migrate_cwl(migration_context, path, commit): """Migrate a cwl file.""" - workflow = parse_cwl_cached(str(path)) + workflow = parse_cwl_cached(migration_context=migration_context, path=str(path)) if isinstance(workflow, CommandLineTool): - _, path = _migrate_single_step(client, workflow, path, commit=commit, persist=True) + _, path = _migrate_single_step( + migration_context=migration_context, cmd_line_tool=workflow, path=path, commit=commit, persist=True + ) else: - _, path = _migrate_composite_step(client, workflow, path, commit=commit) + _, path = _migrate_composite_step( + migration_context=migration_context, workflow=workflow, path=path, commit=commit + ) return path -def _migrate_single_step(client, cmd_line_tool, path, commit=None, parent_commit=None, persist=False): +def _migrate_single_step(migration_context, cmd_line_tool, path, commit=None, parent_commit=None, persist=False): """Migrate a single step workflow.""" if not commit: - commit = client.cache.find_previous_commit(path, revision=parent_commit if parent_commit else "HEAD") + commit = migration_context.cache.find_previous_commit(path, revision=parent_commit if parent_commit else "HEAD") - run = Run(client=client, path=path, commit=commit) + run = Run(path=path, commit=commit) run.command = " ".join(cmd_line_tool.baseCommand) run.successcodes = cmd_line_tool.successCodes @@ -145,7 +156,7 @@ def _migrate_single_step(client, cmd_line_tool, path, commit=None, parent_commit label = f"{rel_path}@{commit.hexsha}" identifier = sha1(label.encode("utf-8")).hexdigest() - base_id = Run.generate_id(client, identifier=identifier) + base_id = Run.generate_id(identifier=identifier) run._id = base_id if cmd_line_tool.stdin: @@ -164,8 +175,8 @@ def _migrate_single_step(client, cmd_line_tool, path, commit=None, parent_commit run.inputs.append( CommandInput( id=id_, - consumes=_entity_from_path(client, stdin, commit), - mapped_to=MappedIOStream(client=client, stream_type="stdin"), + consumes=_entity_from_path(migration_context, stdin, commit), + mapped_to=MappedIOStream(stream_type="stdin"), ) ) @@ -173,8 +184,8 @@ def _migrate_single_step(client, cmd_line_tool, path, commit=None, parent_commit run.outputs.append( CommandOutput( id=CommandOutput.generate_id(base_id, "stdout"), - produces=_entity_from_path(client, cmd_line_tool.stdout, commit), - mapped_to=MappedIOStream(client=client, stream_type="stdout"), + produces=_entity_from_path(migration_context, cmd_line_tool.stdout, commit), + mapped_to=MappedIOStream(stream_type="stdout"), create_folder=False, ) ) @@ -188,8 +199,8 @@ def _migrate_single_step(client, cmd_line_tool, path, commit=None, parent_commit run.outputs.append( CommandOutput( id=CommandOutput.generate_id(base_id, "stderr"), - produces=_entity_from_path(client, cmd_line_tool.stderr, commit), - mapped_to=MappedIOStream(client=client, stream_type="stderr"), + produces=_entity_from_path(migration_context, cmd_line_tool.stderr, commit), + mapped_to=MappedIOStream(stream_type="stderr"), create_folder=False, ) ) @@ -256,7 +267,7 @@ def _migrate_single_step(client, cmd_line_tool, path, commit=None, parent_commit id=CommandOutput.generate_id(base_id, position), position=position, prefix=prefix, - produces=_entity_from_path(client, path, commit), + produces=_entity_from_path(migration_context, path, commit), create_folder=create_folder, ) ) @@ -281,7 +292,7 @@ def _migrate_single_step(client, cmd_line_tool, path, commit=None, parent_commit id=CommandInput.generate_id(base_id, position), position=position, prefix=prefix, - consumes=_entity_from_path(client, path, commit), + consumes=_entity_from_path(migration_context, path, commit), ) ) else: @@ -307,10 +318,10 @@ def _migrate_single_step(client, cmd_line_tool, path, commit=None, parent_commit path = absolute_path.relative_to(project_context.path) run.path = path - process_run = ProcessRun.from_run(run, client, path, commit=commit) - process_run.invalidated = _invalidations_from_commit(client, commit) + process_run = ProcessRun.from_run(run, path, commit=commit) + process_run.invalidated = _invalidations_from_commit(commit) - # HACK: This fixes broken SoftwareAgent due to rebases done by users + # HACK: This fixes broken SoftwareAgent due to re-bases done by users if isinstance(process_run.association.agent, Person) or not process_run.association.agent.label.startswith( "renku " ): @@ -319,15 +330,15 @@ def _migrate_single_step(client, cmd_line_tool, path, commit=None, parent_commit return process_run, absolute_path -def _migrate_composite_step(client, workflow, path, commit=None): +def _migrate_composite_step(migration_context, workflow, path, commit=None): """Migrate a composite workflow.""" if not commit: - commit = client.cache.find_previous_commit(path) - run = Run(client=client, path=path, commit=commit) + commit = migration_context.cache.find_previous_commit(path) + run = Run(path=path, commit=commit) rel_path = Path(path).relative_to(project_context.path) label = f"{rel_path}@{commit.hexsha}" identifier = sha1(label.encode("utf-8")).hexdigest() - run._id = Run.generate_id(client, identifier=identifier) + run._id = Run.generate_id(identifier=identifier) name = "{0}_migrated.yaml".format(uuid.uuid4().hex) @@ -340,14 +351,16 @@ def _migrate_composite_step(client, workflow, path, commit=None): else: uri = urlparse(step.run) path = uri.path - subrun = parse_cwl_cached(path) + sub_run = parse_cwl_cached(migration_context=migration_context, path=path) - subprocess, _ = _migrate_single_step(client, subrun, path, parent_commit=commit) + subprocess, _ = _migrate_single_step( + migration_context=migration_context, cmd_line_tool=sub_run, path=path, parent_commit=commit + ) run.add_subprocess(subprocess) - wf = WorkflowRun.from_run(run, client, run.path, commit=commit) + wf = WorkflowRun.from_run(run, run.path, commit=commit) - # HACK: This fixes broken SoftwareAgent due to rebases done by users + # HACK: This fixes broken SoftwareAgent due to re-bases done by users if isinstance(wf.association.agent, Person) or not wf.association.agent.label.startswith("renku "): wf.association.agent = default_missing_software_agent for p in wf._processes: @@ -358,10 +371,10 @@ def _migrate_composite_step(client, workflow, path, commit=None): return wf, run.path -def _entity_from_path(client, path, commit): +def _entity_from_path(migration_context, path, commit): """Gets the entity associated with a path.""" - client, _, commit, path = get_in_submodules( - project_context.repository, client.cache.find_previous_commit(path, revision=commit), path + _, commit, path = get_in_submodules( + project_context.repository, migration_context.cache.find_previous_commit(path, revision=commit), path ) entity_cls = Entity @@ -369,12 +382,12 @@ def _entity_from_path(client, path, commit): entity_cls = Collection if str(path).startswith(os.path.join(RENKU_HOME, OLD_DATASETS_PATH)): - return Dataset.from_yaml(path=project_context.path / path, client=client, commit=commit) + return Dataset.from_yaml(path=project_context.path / path, commit=commit) else: - return entity_cls(commit=commit, client=client, path=str(path)) + return entity_cls(commit=commit, path=str(path)) -def _invalidations_from_commit(client, commit: Commit): +def _invalidations_from_commit(commit: Commit): """Gets invalidated files from a commit.""" results = [] collections = dict() @@ -383,16 +396,16 @@ def _invalidations_from_commit(client, commit: Commit): if not file.deleted: continue path = Path(file.a_path) - entity = _get_activity_entity(client, commit, path, collections, deleted=True) + entity = _get_activity_entity(commit, path, collections, deleted=True) results.append(entity) return results -def _get_activity_entity(client, commit, path, collections, deleted=False): +def _get_activity_entity(commit, path, collections, deleted=False): """Gets the entity associated with this Activity and path.""" - client, _, commit, path = get_in_submodules(project_context.repository, commit, path) + _, commit, path = get_in_submodules(repository=project_context.repository, commit=commit, path=path) output_path = project_context.path / path parents = list(output_path.relative_to(project_context.path).parents) @@ -402,7 +415,7 @@ def _get_activity_entity(client, commit, path, collections, deleted=False): if str(parent) in collections: collection = collections[str(parent)] else: - collection = Collection(client=client, commit=commit, path=str(parent), members=[], parent=collection) + collection = Collection(commit=commit, path=str(parent), members=[], parent=collection) members.append(collection) collections[str(parent)] = collection @@ -413,9 +426,9 @@ def _get_activity_entity(client, commit, path, collections, deleted=False): entity_cls = Collection if str(path).startswith(os.path.join(RENKU_HOME, OLD_DATASETS_PATH)) and not deleted: - entity = Dataset.from_yaml(path=project_context.path / path, client=client, commit=commit) + entity = Dataset.from_yaml(path=project_context.path / path, commit=commit) else: - entity = entity_cls(commit=commit, client=client, path=str(path), parent=collection) + entity = entity_cls(commit=commit, path=str(path), parent=collection) if collection: collection.members.append(entity) @@ -423,100 +436,13 @@ def _get_activity_entity(client, commit, path, collections, deleted=False): return entity -_cwl_cache = {} - - -def parse_cwl_cached(path): +def parse_cwl_cached(migration_context, path: str): """Parse cwl and remember the result for future execution.""" - if path in _cwl_cache: - return _cwl_cache[path] + if path in migration_context.cwl_cache: + return migration_context.cwl_cache[path] cwl = load_document_by_uri(path) - _cwl_cache[path] = cwl + migration_context.cwl_cache[path] = cwl return cwl - - -class RepositoryCache: - """Cache for a git repository.""" - - def __init__(self, client, repository, cache, cwl_files_commits): - self.client = client - self.repository = repository - self.cache = cache - self.cwl_files_commits = cwl_files_commits - - @classmethod - def from_client(cls, client): - """Return a cached repository.""" - cache = defaultdict(list) - cwl_files_commits_map = {} - repository = project_context.repository - - for n, commit in enumerate(repository.iterate_commits(full_history=True), start=1): - communication.echo(f"Caching commit {n}", end="\r") - - cwl_files = [] - for file in commit.get_changes(): - # Ignore deleted files - if file.deleted: - continue - - path = file.b_path - cache[path].append(commit) - - if path.startswith(f"{RENKU_HOME}/workflow/") and path.endswith(".cwl"): - cwl_files.append(os.path.realpath(project_context.path / path)) - - cls._update_cwl_files_and_commits(commit, cwl_files_commits_map, cwl_files) - - communication.echo(40 * " ", end="\r") - - return RepositoryCache(client, repository, cache, cwl_files_commits_map) - - @staticmethod - def _update_cwl_files_and_commits(commit, cwl_files_commits_map, cwl_files): - if len(cwl_files) != 1: - return - - path = cwl_files[0] - existing_commit = cwl_files_commits_map.get(path) - - if existing_commit is None: - cwl_files_commits_map[path] = commit - elif existing_commit.compare_to(commit) < 0: # existing commit is older - cwl_files_commits_map[path] = commit - - def find_previous_commit(self, path, revision="HEAD"): - """Return a previous commit for a given path starting from 'revision'.""" - - def find_from_client(path, revision): - try: - return self.repository.get_previous_commit(path=path, revision=revision, full_history=True) - except errors.GitCommitNotFoundError: - communication.warn(f"Cannot find previous commit for {path} from {str(revision)}") - return revision - - try: - path = (project_context.path / path).relative_to(project_context.path) - except ValueError: - pass - path = str(path) - - if revision == "HEAD": - revision = self.repository.head.commit - - commits = self.cache.get(git_unicode_unescape(path)) - if not commits: - return find_from_client(path, revision) - - if revision in commits: - return revision - - for commit in commits: - if commit.compare_to(revision) <= 0: - return commit - - # No commit was found - return find_from_client(path, revision) diff --git a/renku/core/migration/m_0006__dataset_context.py b/renku/core/migration/m_0006__dataset_context.py index 23ea7325f0..2710d9eab0 100644 --- a/renku/core/migration/m_0006__dataset_context.py +++ b/renku/core/migration/m_0006__dataset_context.py @@ -17,14 +17,14 @@ # limitations under the License. """DatasetFile metadata migrations.""" -from renku.core.migration.models.v3 import get_client_datasets +from renku.core.migration.models.v3 import get_project_datasets -def migrate(migration_context): +def migrate(_): """Migration function.""" - _fix_dataset_metadata(migration_context.client) + _fix_dataset_metadata() -def _fix_dataset_metadata(client): - for dataset in get_client_datasets(client): +def _fix_dataset_metadata(): + for dataset in get_project_datasets(): dataset.to_yaml() diff --git a/renku/core/migration/m_0007__source_url.py b/renku/core/migration/m_0007__source_url.py index 29fa7a61a3..6669321085 100644 --- a/renku/core/migration/m_0007__source_url.py +++ b/renku/core/migration/m_0007__source_url.py @@ -17,25 +17,25 @@ # limitations under the License. """DatasetFile source and url migrations.""" -from renku.core.migration.models.v7 import get_client_datasets +from renku.core.migration.models.v7 import get_project_datasets from renku.core.migration.utils import generate_dataset_file_url -def migrate(migration_context): +def migrate(_): """Migration function.""" - _fix_dataset_file_source_and_url(migration_context.client) + _fix_dataset_file_source_and_url() -def _fix_dataset_file_source_and_url(client): - for dataset in get_client_datasets(client): - for file_ in dataset.files: - file_.source = file_.url - file_.url = generate_dataset_file_url(client, filepath=file_.path) +def _fix_dataset_file_source_and_url(): + for dataset in get_project_datasets(): + for file in dataset.files: + file.source = file.url + file.url = generate_dataset_file_url(filepath=file.path) - if file_.source: - file_.source = file_.source.replace("file://", "") + if file.source: + file.source = file.source.replace("file://", "") - if file_.based_on: - file_.based_on.source = file_.based_on.url + if file.based_on: + file.based_on.source = file.based_on.url dataset.to_yaml() diff --git a/renku/core/migration/m_0008__dataset_metadata.py b/renku/core/migration/m_0008__dataset_metadata.py index 8070b482d0..55f3c15dcb 100644 --- a/renku/core/migration/m_0008__dataset_metadata.py +++ b/renku/core/migration/m_0008__dataset_metadata.py @@ -17,16 +17,16 @@ # limitations under the License. """Dataset metadata migrations.""" -from renku.core.migration.models.v8 import get_client_datasets +from renku.core.migration.models.v8 import get_project_datasets -def migrate(migration_context): +def migrate(_): """Migration function.""" - _fix_dataset_metadata(migration_context.client) + _fix_dataset_metadata() -def _fix_dataset_metadata(client): - for dataset in get_client_datasets(client): +def _fix_dataset_metadata(): + for dataset in get_project_datasets(): dataset.files = _get_unique_files(dataset.files) dataset.to_yaml() diff --git a/renku/core/migration/m_0009__new_metadata_storage.py b/renku/core/migration/m_0009__new_metadata_storage.py index 8d350060d7..771ab3d25c 100644 --- a/renku/core/migration/m_0009__new_metadata_storage.py +++ b/renku/core/migration/m_0009__new_metadata_storage.py @@ -25,23 +25,20 @@ from hashlib import sha1 from itertools import chain from pathlib import Path, PurePosixPath -from typing import List, Optional, Union +from typing import TYPE_CHECKING, List, Optional, Union from urllib.parse import urlparse -import renku.core.management.migrate from renku.command.command_builder import inject from renku.core import errors from renku.core.dataset.datasets_provenance import DatasetsProvenance from renku.core.interface.activity_gateway import IActivityGateway -from renku.core.interface.client_dispatcher import IClientDispatcher from renku.core.interface.database_gateway import IDatabaseGateway from renku.core.interface.project_gateway import IProjectGateway -from renku.core.management.client import LocalClient from renku.core.migration.models import v9 as old_schema +from renku.core.migration.models.migration import DatasetMigrationContext, MigrationContext, MigrationType from renku.core.migration.utils import ( OLD_DATASETS_PATH, OLD_METADATA_PATH, - MigrationType, read_project_version_from_yaml, set_temporary_datasets_path, unset_temporary_datasets_path, @@ -63,25 +60,31 @@ MappedIOStream, ) from renku.domain_model.workflow.plan import Plan -from renku.infrastructure.repository import Commit -PLAN_CACHE = {} +if TYPE_CHECKING: + from renku.infrastructure.repository import Commit -def migrate(migration_context): +def migrate(migration_context: MigrationContext): """Migration function.""" - client = migration_context.client repository = project_context.repository - committed = _commit_previous_changes(client) + committed = _commit_previous_changes() # NOTE: Initialize submodules _ = repository.submodules - _generate_new_metadata( - committed=committed, - strict=migration_context.options.strict, - migration_type=migration_context.options.type, - preserve_identifiers=migration_context.options.preserve_identifiers, - ) - _remove_dataset_metadata_files(client) + + migration_context.plan_cache = {} + try: + _generate_new_metadata( + migration_context=migration_context, + committed=committed, + strict=migration_context.options.strict, + migration_type=migration_context.options.type, + preserve_identifiers=migration_context.options.preserve_identifiers, + ) + finally: + migration_context.plan_cache = None + + _remove_dataset_metadata_files() metadata_path = project_context.metadata_path.joinpath(OLD_METADATA_PATH) with open(metadata_path, "w") as f: @@ -91,11 +94,11 @@ def migrate(migration_context): ) -def _commit_previous_changes(client): +def _commit_previous_changes(): repository = project_context.repository if repository.is_dirty(): project_path = project_context.metadata_path.joinpath(OLD_METADATA_PATH) - project = old_schema.Project.from_yaml(project_path, client) + project = old_schema.Project.from_yaml(project_path) project.version = "8" project.to_yaml(project_context.metadata_path.joinpath(project_path)) @@ -109,12 +112,12 @@ def _commit_previous_changes(client): @inject.autoparams() -def maybe_migrate_project_to_database(client, project_gateway: IProjectGateway): +def _maybe_migrate_project_to_database(project_gateway: IProjectGateway): """Migrate project to database if necessary.""" metadata_path = project_context.metadata_path.joinpath(OLD_METADATA_PATH) if metadata_path.exists(): - old_project = old_schema.Project.from_yaml(metadata_path, client=client) + old_project = old_schema.Project.from_yaml(metadata_path) id_path = urlparse(old_project._id).path id_path = id_path.replace("/projects/", "") @@ -141,7 +144,7 @@ def maybe_migrate_project_to_database(client, project_gateway: IProjectGateway): project_gateway.update_project(new_project) -def remove_graph_files(client): +def _remove_graph_files(): """Remove all graph files.""" # NOTE: These are required for projects that have new graph files try: @@ -162,11 +165,11 @@ def remove_graph_files(client): pass -@inject.autoparams() +@inject.autoparams("database_gateway", "activity_gateway") def _generate_new_metadata( - strict, + migration_context: MigrationContext, + strict: bool, migration_type: MigrationType, - client_dispatcher: IClientDispatcher, database_gateway: IDatabaseGateway, activity_gateway: IActivityGateway, force=True, @@ -175,17 +178,16 @@ def _generate_new_metadata( preserve_identifiers=False, ): """Generate graph and dataset provenance metadata.""" - client = client_dispatcher.current_client repository = project_context.repository if force: - remove_graph_files(client) + _remove_graph_files() elif has_graph_files(): raise errors.OperationError("Graph metadata exists.") database_gateway.initialize() - maybe_migrate_project_to_database(client) + _maybe_migrate_project_to_database() datasets_provenance = DatasetsProvenance() @@ -205,9 +207,11 @@ def _generate_new_metadata( try: # NOTE: Don't migrate workflows for dataset-only migrations if MigrationType.WORKFLOWS in migration_type: - _process_workflows(activity_gateway=activity_gateway, commit=commit, remove=remove, client=client) + _process_workflows( + migration_context=migration_context, activity_gateway=activity_gateway, commit=commit, remove=remove + ) _process_datasets( - client=client, + migration_context=migration_context, commit=commit, datasets_provenance=datasets_provenance, is_last_commit=is_last_commit, @@ -230,7 +234,7 @@ def _generate_new_metadata( database_gateway.commit() -def _convert_run_to_plan(run: old_schema.Run, project_id) -> Plan: +def _convert_run_to_plan(run: old_schema.Run, migration_context: MigrationContext, project_id) -> Plan: """Create a Plan from a Run.""" assert not run.subprocesses, f"Cannot create a Plan from a Run with subprocesses: {run._id}" @@ -238,12 +242,12 @@ def extract_run_uuid(run_id: str) -> str: # https://localhost/runs/723fd784-9347-4081-84de-a6dbb067545b/ return run_id.rstrip("/").rsplit("/", maxsplit=1)[-1] - uuid = extract_run_uuid(run._id) + run_uuid = extract_run_uuid(run._id) - if uuid in PLAN_CACHE: - return PLAN_CACHE[uuid] + if run_uuid in migration_context.plan_cache: + return migration_context.plan_cache[run_uuid] - plan_id = Plan.generate_id(uuid=uuid) + plan_id = Plan.generate_id(uuid=run_uuid) def get_mime_type(entity: Union[old_schema.Entity, old_schema.Collection]) -> List[str]: return [DIRECTORY_MIME_TYPE] if isinstance(entity, old_schema.Collection) else ["application/octet-stream"] @@ -316,7 +320,7 @@ def convert_output(output: old_schema.CommandOutput) -> CommandOutput: success_codes=run.successcodes, ) - PLAN_CACHE[uuid] = plan + migration_context.plan_cache[run_uuid] = plan return plan @@ -341,7 +345,9 @@ def _get_process_runs(workflow_run: old_schema.WorkflowRun) -> List[old_schema.P return activities -def _process_workflows(client: LocalClient, activity_gateway: IActivityGateway, commit: Commit, remove: bool): +def _process_workflows( + migration_context: MigrationContext, activity_gateway: IActivityGateway, commit: "Commit", remove: bool +): for file in commit.get_changes(paths=f"{project_context.metadata_path}/workflow/*.yaml"): if file.deleted: @@ -356,7 +362,7 @@ def _process_workflows(client: LocalClient, activity_gateway: IActivityGateway, communication.warn(f"Workflow file does not exists: '{path}'") continue - workflow = old_schema.Activity.from_yaml(path=path, client=client) + workflow = old_schema.Activity.from_yaml(path=path) if isinstance(workflow, old_schema.WorkflowRun): activities = _get_process_runs(workflow) @@ -364,7 +370,7 @@ def _process_workflows(client: LocalClient, activity_gateway: IActivityGateway, activities = [workflow] for old_activity in activities: - new_activities = _process_run_to_new_activity(process_run=old_activity) + new_activities = _process_run_to_new_activity(migration_context=migration_context, process_run=old_activity) for new_activity in new_activities: activity_gateway.add(new_activity) @@ -375,7 +381,9 @@ def _process_workflows(client: LocalClient, activity_gateway: IActivityGateway, pass -def _process_run_to_new_activity(process_run: old_schema.ProcessRun) -> List[Activity]: +def _process_run_to_new_activity( + migration_context: MigrationContext, process_run: old_schema.ProcessRun +) -> List[Activity]: """Convert a ProcessRun to a new Activity.""" def generate_activity_id(process_run_id: str, suffix: str = None) -> str: @@ -409,7 +417,7 @@ def generate_activity_id(process_run_id: str, suffix: str = None) -> str: activities = [] for i, run in enumerate(runs): activity_id = generate_activity_id(process_run._id, suffix=str(i) if i else None) - plan = _convert_run_to_plan(run, project_id=project_id) + plan = _convert_run_to_plan(run=run, migration_context=migration_context, project_id=project_id) agents = [_old_agent_to_new_agent(a) for a in process_run.agents or []] association_agent = _old_agent_to_new_agent(process_run.association.agent) @@ -626,7 +634,11 @@ def _old_agent_to_new_agent( def _process_datasets( - client: LocalClient, commit: Commit, datasets_provenance: DatasetsProvenance, is_last_commit, preserve_identifiers + migration_context: MigrationContext, + commit: "Commit", + datasets_provenance: DatasetsProvenance, + is_last_commit, + preserve_identifiers, ): changes = commit.get_changes(paths=".renku/datasets/*/*.yml") changed_paths = [c.b_path for c in changes if not c.deleted] @@ -634,8 +646,8 @@ def _process_datasets( deleted_paths = [c.a_path for c in changes if c.deleted] deleted_paths = [p for p in deleted_paths if len(Path(p).parents) == 4] - datasets, deleted_datasets = _fetch_datasets( - client=client, revision=commit.hexsha, paths=paths, deleted_paths=deleted_paths + datasets, deleted_datasets = fetch_datasets( + migration_context=migration_context, revision=commit.hexsha, paths=paths, deleted_paths=deleted_paths ) revision = commit.hexsha @@ -663,17 +675,21 @@ def _process_datasets( ) -def _fetch_datasets(client: LocalClient, revision: str, paths: List[str], deleted_paths: List[str]): +def fetch_datasets( + migration_context: MigrationContext, + revision: str, + paths: List[str], + deleted_paths: List[str], +): + """Fetch a dataset from a given revision.""" from renku.core.migration.models.v9 import Dataset repository = project_context.repository - datasets_path = project_context.path / ".renku" / "tmp" / OLD_DATASETS_PATH + datasets_path = repository.path / ".renku" / "tmp" / OLD_DATASETS_PATH shutil.rmtree(datasets_path, ignore_errors=True) datasets_path.mkdir(parents=True, exist_ok=True) - client.dataset_migration_context = _DatasetMigrationContext(repository=repository, revision=revision) - def read_project_version(): """Read project version at revision.""" try: @@ -722,20 +738,27 @@ def copy_and_migrate_datasets(): else: existing.append(new_path) + migration_context.dataset_migration_context = DatasetMigrationContext(repository=repository, revision=revision) + try: + from renku.core.migration.migrate import migrate_project + project_version = read_project_version() set_temporary_datasets_path(datasets_path) communication.disable() - renku.core.management.migrate.migrate( - project_version=project_version, - skip_template_update=True, - skip_docker_update=True, - max_version=8, - migration_type=MigrationType.DATASETS, - ) + + with project_context.with_path(repository.path): + migrate_project( + project_version=project_version, + skip_template_update=True, + skip_docker_update=True, + max_version=8, + migration_type=MigrationType.DATASETS, + ) finally: communication.enable() unset_temporary_datasets_path() + migration_context.dataset_migration_context = None return existing, deleted @@ -743,7 +766,7 @@ def copy_and_migrate_datasets(): datasets = [] for metadata_path in paths: - dataset = Dataset.from_yaml(metadata_path, client) + dataset = Dataset.from_yaml(metadata_path) # NOTE: Fixing dataset path after migration initial_identifier = Path(dataset.path).name dataset.path = f".renku/datasets/{initial_identifier}" @@ -751,7 +774,7 @@ def copy_and_migrate_datasets(): deleted_datasets = [] for metadata_path in deleted_paths: - dataset = Dataset.from_yaml(metadata_path, client) + dataset = Dataset.from_yaml(metadata_path) # NOTE: Fixing dataset path after migration initial_identifier = Path(dataset.path).name dataset.path = f".renku/datasets/{initial_identifier}" @@ -760,34 +783,8 @@ def copy_and_migrate_datasets(): return datasets, deleted_datasets -class _DatasetMigrationContext: - def __init__(self, repository, revision): - self.repository = repository - self.revision = revision - - def exists(self, path) -> bool: - try: - self.repository.run_git_command("cat-file", "-e", f"{self.revision}:{path}") - except errors.GitCommandError: - return False - else: - return True - - def is_dir(self, path) -> bool: - try: - result = self.repository.run_git_command("cat-file", "-t", f"{self.revision}:{path}") - except errors.GitCommandError: - return False - else: - return "tree" in result - - def get_previous_commit(self, path): - return self.repository.get_previous_commit(path, revision=self.revision) - - -def _remove_dataset_metadata_files(client: LocalClient): +def _remove_dataset_metadata_files(): """Remove old dataset metadata.""" - try: shutil.rmtree(os.path.join(project_context.metadata_path, OLD_DATASETS_PATH)) except FileNotFoundError: diff --git a/renku/core/management/migrate.py b/renku/core/migration/migrate.py similarity index 82% rename from renku/core/management/migrate.py rename to renku/core/migration/migrate.py index b2ba2416f8..3a164ae33c 100644 --- a/renku/core/management/migrate.py +++ b/renku/core/migration/migrate.py @@ -18,13 +18,13 @@ """Renku migrations management. Migrations files are put in renku/core/management/migrations directory. Name -of these files has m_1234__name.py format where 1234 is the migration version +of these files has ``m_1234__name.py`` format where 1234 is the migration version and name can be any alphanumeric and underscore combination. Migration files are sorted based on their lowercase name. Each migration file must define a -public "migrate" function that accepts a client as its argument. +public ``migrate`` function that accepts a ``MigrationContext`` as its argument. When executing a migration, the migration file is imported as a module and the -"migrate" function is executed. Migration version is checked against the Renku +``migrate`` function is executed. Migration version is checked against the Renku project version and any migration which has a higher version is applied to the project. """ @@ -45,16 +45,9 @@ ProjectNotSupported, TemplateUpdateError, ) -from renku.core.interface.client_dispatcher import IClientDispatcher from renku.core.interface.project_gateway import IProjectGateway -from renku.core.migration.utils import ( - OLD_METADATA_PATH, - MigrationContext, - MigrationOptions, - MigrationType, - is_using_temporary_datasets_path, - read_project_version, -) +from renku.core.migration.models.migration import MigrationContext, MigrationType +from renku.core.migration.utils import OLD_METADATA_PATH, is_using_temporary_datasets_path, read_project_version from renku.core.util import communication from renku.domain_model.project_context import project_context @@ -89,9 +82,8 @@ def is_docker_update_possible(): return _update_dockerfile(check_only=True) -@inject.autoparams() -def migrate( - client_dispatcher: IClientDispatcher, +@inject.autoparams("project_gateway") +def migrate_project( project_gateway: IProjectGateway, force_template_update=False, skip_template_update=False, @@ -100,17 +92,33 @@ def migrate( project_version=None, max_version=None, strict=False, - migration_type=MigrationType.ALL, + migration_type: MigrationType = MigrationType.ALL, preserve_identifiers=False, ): - """Apply all migration files to the project.""" - client = client_dispatcher.current_client + """Migrate all project's entities. + + NOTE: The project path must be pushed to the project_context before calling this function. + + Args: + force_template_update: Whether to force update the template (Default value = False). + skip_template_update: Whether to skip updating the template (Default value = False). + skip_docker_update: Whether to skip updating the Dockerfile (Default value = False). + skip_migrations: Whether to skip migrating project metadata (Default value = False). + project_version: Starting migration version (Default value = False). + max_version: Apply migration up to the given version (Default value = False). + strict: Whether to fail on errors (Default value = False). + migration_type(MigrationType): Type of migration to perform (Default value = MigrationType.ALL). + preserve_identifiers: Whether to preserve ids when migrating metadata (Default value = False). + + Returns: + Dictionary of project migration status. + """ template_updated = docker_updated = False if not is_renku_project(): return False, template_updated, docker_updated try: - project = project_gateway.get_project() + project = project_context.project except ValueError: project = None @@ -136,8 +144,7 @@ def migrate( project_version = project_version or get_project_version() n_migrations_executed = 0 - migration_options = MigrationOptions(strict=strict, type=migration_type, preserve_identifiers=preserve_identifiers) - migration_context = MigrationContext(client=client, options=migration_options) + migration_context = MigrationContext(strict=strict, type=migration_type, preserve_identifiers=preserve_identifiers) version = 1 for version, path in get_migrations(): @@ -154,7 +161,6 @@ def migrate( n_migrations_executed += 1 if not is_using_temporary_datasets_path(): if n_migrations_executed > 0: - project_context.reset_project() # NOTE: force reloading of project metadata project_context.project.version = str(version) project_gateway.update_project(project_context.project) diff --git a/renku/core/migration/models/migration.py b/renku/core/migration/models/migration.py new file mode 100644 index 0000000000..a0667654dc --- /dev/null +++ b/renku/core/migration/models/migration.py @@ -0,0 +1,168 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2017-2022 - Swiss Data Science Center (SDSC) +# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and +# Eidgenössische Technische Hochschule Zürich (ETHZ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Migration utility functions.""" + +import os +from collections import defaultdict +from enum import IntFlag +from typing import Any, Dict, NamedTuple, Optional + +from renku.core import errors +from renku.core.constant import RENKU_HOME +from renku.core.util import communication +from renku.infrastructure.repository import Repository, git_unicode_unescape + + +class MigrationType(IntFlag): + """Type of migration that is being executed.""" + + DATASETS = 1 + WORKFLOWS = 2 + STRUCTURAL = 4 + ALL = DATASETS | WORKFLOWS | STRUCTURAL + + +class MigrationOptions(NamedTuple): + """Migration options.""" + + strict: bool + preserve_identifiers: bool + type: MigrationType = MigrationType.ALL + + +class MigrationContext: + """Context containing required migration information.""" + + def __init__(self, strict: bool, type: MigrationType, preserve_identifiers: bool): + self.options = MigrationOptions(strict=strict, type=type, preserve_identifiers=preserve_identifiers) + self.dataset_migration_context: Optional[DatasetMigrationContext] = None + self.cache: Optional[RepositoryCache] = None + self.cwl_cache: Optional[Dict[str, Any]] = None + self.plan_cache: Optional[Dict[str, Any]] = None + + +class DatasetMigrationContext: + """The context to get path info in a specific revision.""" + + def __init__(self, repository, revision): + self.repository = repository + self.revision = revision + + def exists(self, path) -> bool: + """If a given path existed at the revision.""" + try: + self.repository.run_git_command("cat-file", "-e", f"{self.revision}:{path}") + except errors.GitCommandError: + return False + else: + return True + + def is_dir(self, path) -> bool: + """If a given path was a directory at the revision.""" + try: + result = self.repository.run_git_command("cat-file", "-t", f"{self.revision}:{path}") + except errors.GitCommandError: + return False + else: + return "tree" in result + + def get_previous_commit(self, path): + """Get previous commit of a given path starting from the revision.""" + return self.repository.get_previous_commit(path, revision=self.revision) + + +class RepositoryCache: + """Cache for a git repository.""" + + def __init__(self, repository: "Repository", cache, cwl_files_commits): + self.repository = repository + self.cache = cache + self.cwl_files_commits = cwl_files_commits + + @classmethod + def from_repository(cls, repository: "Repository"): + """Return a cached repository.""" + cache = defaultdict(list) + cwl_files_commits_map = {} + + for n, commit in enumerate(repository.iterate_commits(full_history=True), start=1): + communication.echo(f"Caching commit {n}", end="\r") + + cwl_files = [] + for file in commit.get_changes(): + # Ignore deleted files + if file.deleted: + continue + + path = file.b_path + cache[path].append(commit) + + if path.startswith(f"{RENKU_HOME}/workflow/") and path.endswith(".cwl"): + cwl_files.append(os.path.realpath(repository.path / path)) + + cls._update_cwl_files_and_commits(commit, cwl_files_commits_map, cwl_files) + + communication.echo(40 * " ", end="\r") + + return RepositoryCache(repository, cache, cwl_files_commits_map) + + @staticmethod + def _update_cwl_files_and_commits(commit, cwl_files_commits_map, cwl_files): + if len(cwl_files) != 1: + return + + path = cwl_files[0] + existing_commit = cwl_files_commits_map.get(path) + + if existing_commit is None: + cwl_files_commits_map[path] = commit + elif existing_commit.compare_to(commit) < 0: # existing commit is older + cwl_files_commits_map[path] = commit + + def find_previous_commit(self, path, revision="HEAD"): + """Return a previous commit for a given path starting from 'revision'.""" + + def find_from_repository(path, revision): + try: + return self.repository.get_previous_commit(path=path, revision=revision, full_history=True) + except errors.GitCommitNotFoundError: + communication.warn(f"Cannot find previous commit for {path} from {str(revision)}") + return revision + + try: + path = (self.repository.path / path).relative_to(self.repository.path) + except ValueError: + pass + path = str(path) + + if revision == "HEAD": + revision = self.repository.head.commit + + commits = self.cache.get(git_unicode_unescape(path)) + if not commits: + return find_from_repository(path, revision) + + if revision in commits: + return revision + + for commit in commits: + if commit.compare_to(revision) <= 0: + return commit + + # No commit was found + return find_from_repository(path, revision) diff --git a/renku/core/migration/models/refs.py b/renku/core/migration/models/refs.py index 81df3dc41a..cab5a6af3c 100644 --- a/renku/core/migration/models/refs.py +++ b/renku/core/migration/models/refs.py @@ -66,7 +66,7 @@ def path(self): @property def reference(self): - """Return the path we point to relative to the client.""" + """Return the path we point to.""" return self.path.resolve() def delete(self): diff --git a/renku/core/migration/models/v3.py b/renku/core/migration/models/v3.py index 7ebef979de..d7d1ee3134 100644 --- a/renku/core/migration/models/v3.py +++ b/renku/core/migration/models/v3.py @@ -35,8 +35,6 @@ class Base: def __init__(self, **kwargs): """Initialize an instance.""" - self.client = None - kwargs.setdefault("_id", None) for k, v in kwargs.items(): @@ -51,11 +49,11 @@ class Person(Base): name = None @classmethod - def from_repository(cls, repository, client=None): + def from_repository(cls, repository): """Create an instance from a repository.""" user = repository.get_user() instance = cls(name=user.name, email=user.email) - instance.fix_id(client) + instance.fix_id() return instance def __init__(self, **kwargs): @@ -70,12 +68,10 @@ def full_identity(self): affiliation = f" [{self.affiliation}]" if self.affiliation else "" return f"{self.name}{email}{affiliation}" - def fix_id(self, client=None): + def fix_id(self): """Fixes the id of a Person if it is not set.""" if not self._id or "mailto:None" in self._id or self._id.startswith("_:"): - if not client and self.client: - client = self.client - hostname = get_host(client) + hostname = get_host() self._id = OldPerson.generate_id(email=self.email, full_identity=self.full_identity, hostname=hostname) @@ -134,7 +130,7 @@ def __init__(self, **kwargs): super().__init__(**kwargs) if not self._id or self._id.startswith("_:"): - self._id = generate_dataset_tag_id(client=self.client, name=self.name, commit=self.commit) + self._id = generate_dataset_tag_id(name=self.name, commit=self.commit) class Language(Base): @@ -158,17 +154,17 @@ def __init__(self, **kwargs): self.url_str = self.url if not self._id or self._id.startswith("_:"): - self._id = generate_url_id(client=self.client, url_str=self.url_str, url_id=self.url_id) + self._id = generate_url_id(url_str=self.url_str, url_id=self.url_id) class Dataset(Base): """Dataset migration model.""" @classmethod - def from_yaml(cls, path, client=None, commit=None): + def from_yaml(cls, path, commit=None): """Read content from YAML file.""" data = yaml.read_yaml(path) - self = DatasetSchemaV3(client=client, commit=commit).load(data) + self = DatasetSchemaV3(commit=commit).load(data) self._metadata_path = path return self @@ -200,7 +196,7 @@ class Meta: def make_instance(self, data, **kwargs): """Transform loaded dict into corresponding object.""" instance = JsonLDSchema.make_instance(self, data, **kwargs) - instance.fix_id(client=None) + instance.fix_id() return instance @@ -378,12 +374,12 @@ def fix_files_context(self, data, **kwargs): return data -def get_client_datasets(client): - """Return Dataset migration models for a client.""" - paths = get_datasets_path(client).rglob(OLD_METADATA_PATH) +def get_project_datasets(): + """Return Dataset migration models for a project.""" + paths = get_datasets_path().rglob(OLD_METADATA_PATH) datasets = [] for path in paths: - dataset = Dataset.from_yaml(path=path, client=client) + dataset = Dataset.from_yaml(path=path) dataset.path = getattr(dataset, "path", None) or os.path.relpath(path.parent, project_context.path) datasets.append(dataset) diff --git a/renku/core/migration/models/v7.py b/renku/core/migration/models/v7.py index 247e5e6f40..7a3a65d461 100644 --- a/renku/core/migration/models/v7.py +++ b/renku/core/migration/models/v7.py @@ -32,10 +32,10 @@ class Dataset(Base): """Dataset migration model.""" @classmethod - def from_yaml(cls, path, client=None, commit=None): + def from_yaml(cls, path, commit=None): """Read content from YAML file.""" data = yaml.read_yaml(path) - self = DatasetSchemaV7(client=client, commit=commit).load(data) + self = DatasetSchemaV7(commit=commit).load(data) self._metadata_path = path return self @@ -67,7 +67,7 @@ class Meta: files = fields.Nested(schema.hasPart, DatasetFileSchemaV7, many=True) -def get_client_datasets(client): - """Return Dataset migration models for a client.""" - paths = get_datasets_path(client).rglob(OLD_METADATA_PATH) - return [Dataset.from_yaml(path=path, client=client) for path in paths] +def get_project_datasets(): + """Return Dataset migration models for a project.""" + paths = get_datasets_path().rglob(OLD_METADATA_PATH) + return [Dataset.from_yaml(path=path) for path in paths] diff --git a/renku/core/migration/models/v8.py b/renku/core/migration/models/v8.py index 45bac34ffe..272115b134 100644 --- a/renku/core/migration/models/v8.py +++ b/renku/core/migration/models/v8.py @@ -26,6 +26,7 @@ from renku.core.migration.models.v9 import generate_file_id from renku.core.migration.utils import OLD_METADATA_PATH, get_datasets_path from renku.core.util import yaml +from renku.domain_model.project_context import project_context from .v3 import CreatorMixinSchemaV3, DatasetTagSchemaV3, EntitySchemaV3, LanguageSchemaV3, PersonSchemaV3, UrlSchemaV3 from .v7 import Base, DatasetFileSchemaV7 @@ -40,29 +41,29 @@ def __init__(self, **kwargs): if hasattr(self, "path") and (not self._id or self._id.startswith("_:")): hexsha = "UNCOMMITTED" - if self.client and Path(self.path).exists(): - commit = self.client.repository.get_previous_commit(self.path) + if project_context.has_context() and Path(self.path).exists(): + commit = project_context.repository.get_previous_commit(self.path) if commit: hexsha = commit.hexsha - self._id = generate_file_id(client=self.client, hexsha=hexsha, path=self.path) + self._id = generate_file_id(hexsha=hexsha, path=self.path) class Dataset(Base): """Dataset migration model.""" @classmethod - def from_yaml(cls, path, client=None, commit=None): + def from_yaml(cls, path, commit=None): """Read content from YAML file.""" data = yaml.read_yaml(path) - self = DatasetSchemaV8(client=client, commit=commit, flattened=True).load(data) + self = DatasetSchemaV8(commit=commit, flattened=True).load(data) self._metadata_path = path return self def to_yaml(self, path=None): """Write content to a YAML file.""" - for file_ in self.files: - file_._project = self._project + for file in self.files: + file._project = self._project data = DatasetSchemaV8(flattened=True).dump(self) path = path or self._metadata_path or os.path.join(self.path, OLD_METADATA_PATH) @@ -116,7 +117,7 @@ def fix_license(self, data, **kwargs): return data -def get_client_datasets(client): - """Return Dataset migration models for a client.""" - paths = get_datasets_path(client).rglob(OLD_METADATA_PATH) - return [Dataset.from_yaml(path=path, client=client) for path in paths] +def get_project_datasets(): + """Return Dataset migration models for a project.""" + paths = get_datasets_path().rglob(OLD_METADATA_PATH) + return [Dataset.from_yaml(path=path) for path in paths] diff --git a/renku/core/migration/models/v9.py b/renku/core/migration/models/v9.py index dcde7a28a4..40b4febd44 100644 --- a/renku/core/migration/models/v9.py +++ b/renku/core/migration/models/v9.py @@ -28,13 +28,13 @@ from copy import copy from functools import total_ordering from pathlib import Path +from typing import Type, Union from urllib.parse import quote, urljoin, urlparse import attr from attr.validators import instance_of from marshmallow import EXCLUDE, pre_dump -from renku.command.command_builder.command import inject from renku.command.schema.annotation import AnnotationSchema from renku.command.schema.calamus import ( DateTimeList, @@ -51,8 +51,7 @@ ) from renku.command.schema.project import ProjectSchema as NewProjectSchema from renku.core import errors -from renku.core.interface.client_dispatcher import IClientDispatcher -from renku.core.management.migrate import SUPPORTED_PROJECT_VERSION +from renku.core.migration.migrate import SUPPORTED_PROJECT_VERSION from renku.core.migration.models.refs import LinkReference from renku.core.migration.utils import ( OLD_METADATA_PATH, @@ -64,7 +63,6 @@ ) from renku.core.util import yaml as yaml from renku.core.util.datetime8601 import fix_datetime, parse_date -from renku.core.util.dispatcher import get_client from renku.core.util.doi import extract_doi, is_doi from renku.core.util.git import get_in_submodules from renku.core.util.urls import get_host, get_slug @@ -78,20 +76,16 @@ RANDOM_ID_LENGTH = 4 -def _set_entity_client_commit(entity, client, commit): - """Set the client and commit of an entity.""" - if client and not entity.client: - entity.client = client - +def _set_entity_commit(entity, commit): + """Set the commit of an entity.""" if not entity.commit: revision = "UNCOMMITTED" if entity._label: revision = entity._label.rsplit("@", maxsplit=1)[-1] if revision == "UNCOMMITTED": commit = commit - elif client: - repository = project_context.repository - commit = repository.get_commit(revision) + elif project_context.has_context(): + commit = project_context.repository.get_commit(revision) entity.commit = commit @@ -100,8 +94,7 @@ def _str_or_none(data): return str(data) if data is not None else data -@inject.autoparams("client_dispatcher") -def generate_project_id(name, creator, client_dispatcher: IClientDispatcher): +def generate_project_id(name, creator): """Return the id for the project based on the repository origin remote.""" # Determine the hostname for the resource URIs. @@ -114,9 +107,7 @@ def generate_project_id(name, creator, client_dispatcher: IClientDispatcher): owner = creator.email.split("@")[0] - client = client_dispatcher.current_client - - if client: + if project_context.has_context(): remote = project_context.remote host = remote.host or host owner = remote.owner or owner @@ -183,13 +174,16 @@ def __attrs_post_init__(self): self._id = self.project_id except ValueError: """Fallback to old behaviour.""" - client = get_client() - if self._id: - pass - elif client and getattr(client, "project", None): - self._id = project_context.project._id - else: - raise + if not self._id: + try: + self._id = project_context.project.id + except ValueError: + metadata_path = project_context.metadata_path.joinpath(OLD_METADATA_PATH) + self._id = Project.from_yaml(metadata_path)._id + except errors.ConfigurationError: + pass + if not self._id: + raise @property def project_id(self): @@ -197,23 +191,23 @@ def project_id(self): return generate_project_id(name=self.name, creator=self.creator) @classmethod - def from_yaml(cls, path, client=None): + def from_yaml(cls, path): """Return an instance from a YAML file.""" data = yaml.read_yaml(path) - self = cls.from_jsonld(data=data, client=client) + self = cls.from_jsonld(data=data) self._metadata_path = path return self @classmethod - def from_jsonld(cls, data, client=None): + def from_jsonld(cls, data): """Create an instance from JSON-LD data.""" if isinstance(data, cls): return data if not isinstance(data, dict): raise ValueError(data) - return ProjectSchema(client=client).load(data) + return ProjectSchema().load(data) def to_yaml(self, path=None): """Write an instance to the referenced YAML file.""" @@ -231,7 +225,6 @@ class CommitMixin: """Represent a commit mixin.""" commit = attr.ib(default=None, kw_only=True) - client = attr.ib(default=None, kw_only=True) path = attr.ib(default=None, kw_only=True, converter=_str_or_none) _id = attr.ib(default=None, kw_only=True) @@ -241,7 +234,7 @@ class CommitMixin: def default_id(self): """Configure calculated ID.""" hexsha = self.commit.hexsha if self.commit else "UNCOMMITTED" - return generate_file_id(client=self.client, hexsha=hexsha, path=self.path) + return generate_file_id(hexsha=hexsha, path=self.path) @_label.default def default_label(self): @@ -252,25 +245,25 @@ def default_label(self): hexsha = "UNCOMMITTED" if self.path: path = self.path - if self.client and os.path.isabs(path): + if project_context.has_context() and os.path.isabs(path): path = pathlib.Path(path).relative_to(project_context.path) return generate_label(path, hexsha) return hexsha def __attrs_post_init__(self): """Post-init hook.""" - if self.path and self.client: + if self.path and project_context.has_context(): path = pathlib.Path(self.path) if path.is_absolute(): self.path = str(path.relative_to(project_context.path)) # always force "project" to be the current project - if self.client: + if project_context.has_context(): try: self._project = project_context.project except ValueError: metadata_path = project_context.metadata_path.joinpath(OLD_METADATA_PATH) - self._project = Project.from_yaml(metadata_path, client=self.client) + self._project = Project.from_yaml(metadata_path) if not self._id: self._id = self.default_id() @@ -287,7 +280,7 @@ class Entity(CommitMixin): checksum = attr.ib(default=None, kw_only=True, type=str) @classmethod - def from_revision(cls, client, path, revision="HEAD", parent=None, find_previous=True, **kwargs): + def from_revision(cls, path, revision: Union[str, Commit] = "HEAD", parent=None, find_previous=True, **kwargs): """Return dependency from given path and revision.""" repository = project_context.repository @@ -298,11 +291,11 @@ def from_revision(cls, client, path, revision="HEAD", parent=None, find_previous else: assert isinstance(revision, Commit) - client, _, commit, path = get_in_submodules(project_context.repository, revision, path) + _, commit, path = get_in_submodules(project_context.repository, revision, path) path_ = project_context.path / path if path != "." and path_.is_dir(): - entity = Collection(client=client, commit=commit, path=path, members=[], parent=parent) + entity = Collection(commit=commit, path=path, members=[], parent=parent) files_in_commit = [c.b_path for c in commit.get_changes() if not c.deleted] @@ -323,7 +316,6 @@ def from_revision(cls, client, path, revision="HEAD", parent=None, find_previous entity.members.append( cls.from_revision( - client=client, path=member_path, revision=commit, parent=entity, @@ -335,7 +327,7 @@ def from_revision(cls, client, path, revision="HEAD", parent=None, find_previous pass else: - entity = cls(client=client, commit=commit, path=str(path), parent=parent, **kwargs) + entity = cls(commit=commit, path=str(path), parent=parent, **kwargs) return entity @@ -347,7 +339,7 @@ def parent(self): # pragma: no cover @property def entities(self): """Yield itself.""" - if self.client and not self.commit and self._label and "@UNCOMMITTED" not in self._label: + if project_context.has_context() and not self.commit and self._label and "@UNCOMMITTED" not in self._label: repository = project_context.repository self.commit = repository.get_commit(self._label.rsplit("@", maxsplit=1)[-1]) @@ -372,7 +364,7 @@ def __attrs_post_init__(self): def default_members(self): """Generate default members as entities from current path.""" - if not self.client: + if not project_context.has_context(): return [] dir_path = project_context.path / self.path @@ -386,26 +378,17 @@ def default_members(self): for path in dir_path.iterdir(): if path.name == ".gitkeep": continue # ignore empty directories in Git repository - cls = Collection if path.is_dir() else Entity - members.append( - cls( - commit=self.commit, - client=self.client, - path=str(path.relative_to(project_context.path)), - parent=self, - ) - ) + cls: Type = Collection if path.is_dir() else Entity + members.append(cls(commit=self.commit, path=str(path.relative_to(project_context.path)), parent=self)) return members @property def entities(self): """Recursively return all files.""" for member in self.members: - if not member.client and self.client: - member.client = self.client yield from member.entities - if self.client and not self.commit and self._label and "@UNCOMMITTED" not in self._label: + if project_context.has_context() and not self.commit and self._label and "@UNCOMMITTED" not in self._label: repository = project_context.repository self.commit = repository.get_commit(self._label.rsplit("@", maxsplit=1)[-1]) @@ -416,8 +399,6 @@ def entities(self): class MappedIOStream(object): """Represents an IO stream (``stdin``, ``stdout``, ``stderr``).""" - client = attr.ib(default=None, kw_only=True) - _id = attr.ib(default=None, kw_only=True) _label = attr.ib(default=None, kw_only=True) @@ -428,7 +409,7 @@ class MappedIOStream(object): def default_id(self): """Generate an id for a mapped stream.""" host = "localhost" - if self.client: + if project_context.has_context(): host = project_context.remote.host or host host = os.environ.get("RENKU_DOMAIN") or host @@ -634,11 +615,10 @@ class Run(CommitMixin): _activity = attr.ib(kw_only=True, default=None) @staticmethod - def generate_id(client, identifier=None): + def generate_id(identifier=None): """Generate an id for an argument.""" host = "localhost" - if client: - host = project_context.remote.host or host + host = project_context.remote.host or host host = os.environ.get("RENKU_DOMAIN") or host if not identifier: @@ -653,13 +633,13 @@ def __lt__(self, other): for i in other.inputs: entity = i.consumes - for subentity in entity.entities: - a_inputs.add(subentity.path) + for sub_entity in entity.entities: + a_inputs.add(sub_entity.path) for i in self.outputs: entity = i.produces - for subentity in entity.entities: - b_outputs.add(subentity.path) + for sub_entity in entity.entities: + b_outputs.add(sub_entity.path) return a_inputs & b_outputs @@ -824,17 +804,17 @@ class Activity(CommitMixin): _metadata_path = attr.ib(default=None, init=False) @classmethod - def from_yaml(cls, path, client=None, commit=None): + def from_yaml(cls, path, commit=None): """Return an instance from a YAML file.""" data = yaml.read_yaml(path) - self = cls.from_jsonld(data=data, client=client, commit=commit) + self = cls.from_jsonld(data=data, commit=commit) self._metadata_path = path return self @classmethod - def from_jsonld(cls, data, client=None, commit=None): + def from_jsonld(cls, data, commit=None): """Create an instance from JSON-LD data.""" if isinstance(data, cls): return data @@ -848,7 +828,7 @@ def from_jsonld(cls, data, client=None, commit=None): elif any(str(wfprov.ProcessRun) in d["@type"] for d in data): schema = ProcessRunSchema - return schema(client=client, commit=commit, flattened=True).load(data) + return schema(commit=commit, flattened=True).load(data) @_message.default def default_message(self): @@ -909,38 +889,32 @@ def __attrs_post_init__(self): super().__attrs_post_init__() repository = project_context.repository commit_not_set = not self.commit or self.commit.hexsha in self._id - if commit_not_set and self.client and Path(self.path).exists(): + if commit_not_set and Path(self.path).exists(): self.commit = repository.get_previous_commit(self.path) if self.association: self.association.plan._activity = weakref.ref(self) plan = self.association.plan if not plan.commit: - if self.client: - plan.client = self.client if self.commit: plan.commit = self.commit if plan.inputs: for i in plan.inputs: - _set_entity_client_commit(i.consumes, self.client, self.commit) + _set_entity_commit(entity=i.consumes, commit=self.commit) if plan.outputs: for o in plan.outputs: - _set_entity_client_commit(o.produces, self.client, self.commit) + _set_entity_commit(entity=o.produces, commit=self.commit) - if self.qualified_usage and self.client and self.commit: + if self.qualified_usage and self.commit: usages = [] revision = self.commit.hexsha for usage in self.qualified_usage: if not usage.commit and "@UNCOMMITTED" in usage._label: usages.append( - Usage.from_revision( - client=self.client, path=usage.path, role=usage.role, revision=revision, id=usage._id - ) + Usage.from_revision(path=usage.path, role=usage.role, revision=revision, id=usage._id) ) else: - if not usage.client: - usage.entity.set_client(self.client) if not usage.commit: revision = usage._label.rsplit("@", maxsplit=1)[-1] usage.entity.commit = repository.get_commit(revision) @@ -952,8 +926,7 @@ def __attrs_post_init__(self): def generate_id(cls, commit_hexsha): """Calculate action ID.""" host = "localhost" - if hasattr(cls, "client"): - host = project_context.remote.host or host + host = project_context.remote.host or host host = os.environ.get("RENKU_DOMAIN") or host return urljoin( @@ -962,7 +935,7 @@ def generate_id(cls, commit_hexsha): ) @classmethod - def from_run(cls, run, client, path, commit=None, subprocess_index=None, update_commits=False): + def from_run(cls, run, path, commit=None, subprocess_index=None, update_commits=False): """Convert a ``Run`` to a ``ProcessRun``.""" repository = project_context.repository @@ -982,7 +955,7 @@ def from_run(cls, run, client, path, commit=None, subprocess_index=None, update_ entity = input_.consumes if update_commits: commit = repository.get_previous_commit(input_path, revision=commit.hexsha) - entity = Entity.from_revision(client, input_path, commit) + entity = Entity.from_revision(input_path, commit) dependency = Usage(entity=entity, role=input_.sanitized_id, id=usage_id) @@ -1001,7 +974,6 @@ def from_run(cls, run, client, path, commit=None, subprocess_index=None, update_ id=id_, qualified_usage=usages, association=association, - client=client, commit=commit, path=path, run_parameter=run_parameter, @@ -1010,7 +982,7 @@ def from_run(cls, run, client, path, commit=None, subprocess_index=None, update_ generated = [] for output in run.outputs: - entity = Entity.from_revision(client, output.produces.path, revision=commit, parent=output.produces.parent) + entity = Entity.from_revision(output.produces.path, revision=commit, parent=output.produces.parent) generation = Generation(activity=process_run, role=output.sanitized_id, entity=entity) generated.append(generation) @@ -1044,8 +1016,6 @@ def subprocesses(self): class Url: """Represents a schema URL reference.""" - client = attr.ib(default=None) - url = attr.ib(default=None, kw_only=True) url_str = attr.ib(default=None, kw_only=True) @@ -1055,7 +1025,7 @@ class Url: def default_id(self): """Define default value for id field.""" - return generate_url_id(client=self.client, url_str=self.url_str, url_id=self.url_id) + return generate_url_id(url_str=self.url_str, url_id=self.url_id) def default_url(self): """Define default value for url field.""" @@ -1298,8 +1268,6 @@ def _extract_doi(value): class DatasetTag(object): """Represents a Tag of an instance of a dataset.""" - client = attr.ib(default=None) - name = attr.ib(default=None, kw_only=True, validator=instance_of(str)) description = attr.ib(default=None, kw_only=True, validator=instance_of(str)) @@ -1319,7 +1287,7 @@ def _now(self): def default_id(self): """Define default value for id field.""" - return generate_dataset_tag_id(client=self.client, name=self.name, commit=self.commit) + return generate_dataset_tag_id(name=self.name, commit=self.commit) def __attrs_post_init__(self): """Post-Init hook.""" @@ -1408,7 +1376,7 @@ def default_filename(self): def default_url(self): """Generate default url based on project's ID.""" - return generate_dataset_file_url(client=self.client, filepath=self.path) + return generate_dataset_file_url(filepath=self.path) @property def commit_sha(self): @@ -1418,7 +1386,7 @@ def commit_sha(self): @property def full_path(self): """Return full path in the current reference frame.""" - path = project_context.path / self.path if self.client else self.path + path = project_context.path / self.path return Path(os.path.abspath(path)) @property @@ -1441,7 +1409,7 @@ def __attrs_post_init__(self): if not parsed_id.scheme: self._id = "file://{}".format(self._id) - if not self.url and self.client: + if not self.url: self.url = self.default_url() def update_commit(self, commit): @@ -1602,12 +1570,11 @@ def find_files(self, paths): def find_file(self, path, return_index=False): """Find a file in files container using its relative path.""" - for index, file_ in enumerate(self.files): - if str(file_.path) == str(path): + for index, file in enumerate(self.files): + if str(file.path) == str(path): if return_index: return index - file_.client = self.client - return file_ + return file def update_metadata(self, **kwargs): """Updates instance attributes.""" @@ -1639,8 +1606,6 @@ def update_files(self, files): self._modified = True self.files += new_files - self._update_files_metadata(new_files) - def unlink_file(self, path, missing_ok=False): # FIXME: Remove unused code """Unlink a file from dataset. @@ -1674,11 +1639,10 @@ def mutate(self): self.same_as = None self.derived_from = Url(url_id=self._id) - if self.client: - repository = project_context.repository - mutator = Person.from_repository(repository) - if not any(c for c in self.creators if c.email == mutator.email): - self.creators.append(mutator) + repository = project_context.repository + mutator = Person.from_repository(repository) + if not any(c for c in self.creators if c.email == mutator.email): + self.creators.append(mutator) self.date_created = self._now() self.date_published = None @@ -1693,7 +1657,7 @@ def _replace_identifier(self, new_identifier): self._label = self.identifier def _set_id(self): - self._id = generate_dataset_id(client=self.client, identifier=self.identifier) + self._id = generate_dataset_id(identifier=self.identifier) def __attrs_post_init__(self): """Post-Init hook.""" @@ -1704,7 +1668,7 @@ def __attrs_post_init__(self): self._label = self.identifier if self.derived_from: - host = get_host(self.client) + host = get_host() derived_from_id = self.derived_from._id derived_from_url = self.derived_from.url.get("@id") u = urlparse(derived_from_url) @@ -1716,60 +1680,35 @@ def __attrs_post_init__(self): if self.date_published: self.date_created = None - if not self.path and self.client: + if not self.path: absolute_path = LinkReference( metadata_path=project_context.metadata_path, name=f"datasets/{self.name}" ).reference.parent self.path = str(absolute_path.relative_to(project_context.path)) - self._update_files_metadata() - - try: - if self.client: + if project_context.has_context(): + try: revision = self.commit.hexsha if self.commit else "HEAD" repository = project_context.repository self.commit = repository.get_previous_commit(os.path.join(self.path, "metadata.yml"), revision=revision) - except errors.GitCommitNotFoundError: - pass + except errors.GitCommitNotFoundError: + pass if not self.name: self.name = generate_default_name(self.title, self.version) - def _update_files_metadata(self, files=None): - files = files or self.files - - if not files or not self.client: - return - - for file_ in files: - path = project_context.path / file_.path - file_exists = path.exists() or path.is_symlink() - - if not file_exists: - continue - - if file_.client is None: - repository = project_context.repository - client, _, _, _ = get_in_submodules( - project_context.repository, - repository.get_previous_commit(file_.path, revision="HEAD"), - file_.path, - ) - - file_.client = client - @classmethod - def from_yaml(cls, path, client=None, commit=None): + def from_yaml(cls, path, commit=None): """Return an instance from a YAML file.""" data = yaml.read_yaml(path) - self = cls.from_jsonld(data=data, client=client, commit=commit) + self = cls.from_jsonld(data=data, commit=commit) self._metadata_path = path return self @classmethod - def from_jsonld(cls, data, client=None, commit=None, schema_class=None): + def from_jsonld(cls, data, commit=None, schema_class=None): """Create an instance from JSON-LD data.""" if isinstance(data, cls): return data @@ -1777,7 +1716,7 @@ def from_jsonld(cls, data, client=None, commit=None, schema_class=None): raise ValueError(data) schema_class = schema_class or OldDatasetSchema - return schema_class(client=client, commit=commit, flattened=True).load(data) + return schema_class(commit=commit, flattened=True).load(data) def to_yaml(self, path=None, immutable=False): """Write an instance to the referenced YAML file.""" @@ -1993,7 +1932,7 @@ class Meta: added = DateTimeList(schema.dateCreated, format="iso", extra_formats=("%Y-%m-%d",)) name = fields.String(schema.name, load_default=None) url = fields.String(schema.url, load_default=None) - based_on = Nested(schema.isBasedOn, "OldDatasetFileSchema", load_default=None, propagate_client=False) + based_on = Nested(schema.isBasedOn, "OldDatasetFileSchema", load_default=None) external = fields.Boolean(renku.external, load_default=False) source = fields.String(renku.source, load_default=None) @@ -2068,10 +2007,10 @@ def fix_datetimes(self, obj, many=False, **kwargs): return obj -def get_client_datasets(client): - """Return Dataset migration models for a client.""" - paths = get_datasets_path(client).rglob(OLD_METADATA_PATH) - return [Dataset.from_yaml(path=path, client=client) for path in paths] +def get_project_datasets(): + """Return Dataset migration models for a project.""" + paths = get_datasets_path().rglob(OLD_METADATA_PATH) + return [Dataset.from_yaml(path=path) for path in paths] def generate_label(path, hexsha): @@ -2079,14 +2018,13 @@ def generate_label(path, hexsha): return f"{path}@{hexsha}" -def generate_file_id(client, hexsha, path): +def generate_file_id(hexsha, path): """Generate DatasetFile id field.""" # Determine the hostname for the resource URIs. # If RENKU_DOMAIN is set, it overrides the host from remote. # Default is localhost. host = "localhost" - if client: - host = project_context.remote.host or host + host = project_context.remote.host or host host = os.environ.get("RENKU_DOMAIN") or host # TODO: Use plural name for entity id: /blob/ -> /blobs/ diff --git a/renku/core/migration/utils/__init__.py b/renku/core/migration/utils/__init__.py index becf3992ee..e4df887999 100644 --- a/renku/core/migration/utils/__init__.py +++ b/renku/core/migration/utils/__init__.py @@ -21,20 +21,11 @@ import pathlib import threading import uuid -from enum import IntFlag -from typing import TYPE_CHECKING, NamedTuple from urllib.parse import ParseResult, quote, urljoin, urlparse -from renku.command.command_builder import inject -from renku.core.constant import RENKU_HOME -from renku.core.interface.project_gateway import IProjectGateway from renku.core.util.yaml import read_yaml from renku.domain_model.project_context import project_context -if TYPE_CHECKING: - from renku.core.management.client import LocalClient - - OLD_METADATA_PATH = "metadata.yml" OLD_DATASETS_PATH = "datasets" OLD_WORKFLOW_PATH = "workflow" @@ -42,31 +33,7 @@ thread_local_storage = threading.local() -class MigrationType(IntFlag): - """Type of migration that is being executed.""" - - DATASETS = 1 - WORKFLOWS = 2 - STRUCTURAL = 4 - ALL = DATASETS | WORKFLOWS | STRUCTURAL - - -class MigrationOptions(NamedTuple): - """Migration options.""" - - strict: bool - preserve_identifiers: bool - type: MigrationType = MigrationType.ALL - - -class MigrationContext(NamedTuple): - """Context containing required migration information.""" - - client: "LocalClient" - options: MigrationOptions - - -def generate_url_id(client, url_str, url_id): +def generate_url_id(url_str, url_id): """Generate @id field for Url.""" url = url_str or url_id if url: @@ -76,32 +43,32 @@ def generate_url_id(client, url_str, url_id): id_ = str(uuid.uuid4()) host = "localhost" - if client: + if project_context.has_context(): host = project_context.remote.host or host host = os.environ.get("RENKU_DOMAIN") or host return urljoin("https://{host}".format(host=host), pathlib.posixpath.join("/urls", quote(id_, safe=""))) -def generate_dataset_tag_id(client, name, commit): +def generate_dataset_tag_id(name, commit): """Generate @id field for DatasetTag.""" host = "localhost" - if client: + if project_context.has_context(): host = project_context.remote.host or host host = os.environ.get("RENKU_DOMAIN") or host name = "{0}@{1}".format(name, commit) - return urljoin("https://{host}".format(host=host), pathlib.posixpath.join("/datasettags", quote(name, safe=""))) + return urljoin("https://{host}".format(host=host), pathlib.posixpath.join("/dataset-tags", quote(name, safe=""))) -def generate_dataset_id(client, identifier): +def generate_dataset_id(identifier): """Generate @id field.""" # Determine the hostname for the resource URIs. # If RENKU_DOMAIN is set, it overrides the host from remote. # Default is localhost. host = "localhost" - if client: + if project_context.has_context(): host = project_context.remote.host or host host = os.environ.get("RENKU_DOMAIN") or host @@ -109,21 +76,17 @@ def generate_dataset_id(client, identifier): return urljoin(f"https://{host}", pathlib.posixpath.join("/datasets", quote(identifier, safe=""))) -@inject.autoparams() -def generate_dataset_file_url(client, filepath, project_gateway: IProjectGateway): +def generate_dataset_file_url(filepath): """Generate url for DatasetFile.""" - if not client: - return - try: - project = project_gateway.get_project() + project = project_context.project if not project: return except ValueError: from renku.core.migration.models.v9 import Project metadata_path = project_context.metadata_path.joinpath(OLD_METADATA_PATH) - project = Project.from_yaml(metadata_path, client=client) + project = Project.from_yaml(metadata_path) project_id = urlparse(project._id) else: @@ -188,12 +151,16 @@ def get_pre_0_3_4_datasets_metadata(): return [] -def read_project_version(): +def read_project_version() -> str: """Read project version from metadata file.""" try: return project_context.project.version except (NotImplementedError, ValueError): - yaml_data = read_yaml(project_context.metadata_path.joinpath(OLD_METADATA_PATH)) + metadata_path = project_context.metadata_path / OLD_METADATA_PATH + if not os.path.exists(metadata_path): + return "1" + + yaml_data = read_yaml(metadata_path) return read_project_version_from_yaml(yaml_data) @@ -204,10 +171,11 @@ def read_latest_agent(): try: return project_context.latest_agent except (NotImplementedError, ValueError): - if not os.path.exists(project_context.metadata_path.joinpath(OLD_METADATA_PATH)): + metadata_path = project_context.metadata_path / OLD_METADATA_PATH + if not os.path.exists(metadata_path): raise - yaml_data = read_yaml(project_context.metadata_path.joinpath(OLD_METADATA_PATH)) + yaml_data = read_yaml(metadata_path) jsonld = pyld.jsonld.expand(yaml_data)[0] jsonld = normalize(jsonld) return _get_jsonld_property(jsonld, "http://schema.org/agent", "pre-0.11.0") @@ -222,9 +190,9 @@ def read_project_version_from_yaml(yaml_data): return _get_jsonld_property(jsonld, "http://schema.org/schemaVersion", "1") -def _get_jsonld_property(jsonld, property, default=None): +def _get_jsonld_property(jsonld, property_name, default=None): """Return property value from expanded JSON-LD data.""" - value = jsonld.get(property) + value = jsonld.get(property_name) if not value: return default if isinstance(value, list) and len(value) == 1 and isinstance(value[0], dict) and "@value" in value[0]: @@ -248,11 +216,9 @@ def normalize(value): return value -def get_datasets_path(client): +def get_datasets_path(): """Get the old datasets metadata path.""" - return getattr( - thread_local_storage, "temporary_datasets_path", project_context.path / RENKU_HOME / OLD_DATASETS_PATH - ) + return getattr(thread_local_storage, "temporary_datasets_path", project_context.metadata_path / OLD_DATASETS_PATH) def set_temporary_datasets_path(temporary_datasets_path): diff --git a/renku/core/session/docker.py b/renku/core/session/docker.py index 5aa3f4f4a7..93fcc1d589 100644 --- a/renku/core/session/docker.py +++ b/renku/core/session/docker.py @@ -25,7 +25,6 @@ from renku.core import errors from renku.core.config import get_value -from renku.core.management.client import LocalClient from renku.core.plugin import hookimpl from renku.core.util import communication from renku.domain_model.project_context import project_context @@ -123,7 +122,6 @@ def session_start( image_name: str, project_name: str, config: Optional[Dict[str, Any]], - client: LocalClient, cpu_request: Optional[float] = None, mem_request: Optional[str] = None, disk_request: Optional[str] = None, diff --git a/renku/core/session/renkulab.py b/renku/core/session/renkulab.py index edb1ae2a30..4850651a59 100644 --- a/renku/core/session/renkulab.py +++ b/renku/core/session/renkulab.py @@ -24,7 +24,6 @@ from renku.core import errors from renku.core.config import get_value, set_value -from renku.core.management.client import LocalClient from renku.core.plugin import hookimpl from renku.core.session.utils import get_renku_project_name, get_renku_url from renku.core.util import communication, requests @@ -274,7 +273,6 @@ def session_start( image_name: str, project_name: str, config: Optional[Dict[str, Any]], - client: LocalClient, cpu_request: Optional[float] = None, mem_request: Optional[str] = None, disk_request: Optional[str] = None, diff --git a/renku/core/session/session.py b/renku/core/session/session.py index 25186f0ecd..9c02b0b53d 100644 --- a/renku/core/session/session.py +++ b/renku/core/session/session.py @@ -21,10 +21,8 @@ from itertools import chain from typing import List, Optional -from renku.command.command_builder import inject from renku.core import errors from renku.core.config import get_value -from renku.core.interface.client_dispatcher import IClientDispatcher from renku.core.plugin.session import get_supported_session_providers from renku.core.session.utils import get_image_repository_host, get_renku_project_name from renku.core.util import communication @@ -58,11 +56,9 @@ def list_sessions(session_provider: ISessionProvider) -> List[Session]: return list(chain(*map(list_sessions, providers))) -@inject.autoparams("client_dispatcher") def session_start( provider: str, - config_path: str, - client_dispatcher: IClientDispatcher, + config_path: Optional[str], image_name: str = None, cpu_request: Optional[float] = None, mem_request: Optional[str] = None, @@ -72,8 +68,6 @@ def session_start( """Start interactive session.""" from renku.domain_model.project_context import project_context - client = client_dispatcher.current_client - pinned_image = get_value("interactive", "image") if pinned_image and image_name is None: image_name = pinned_image @@ -114,7 +108,6 @@ def session_start( config=config, project_name=project_name, image_name=image_name, - client=client, cpu_request=cpu_limit, mem_request=mem_limit, disk_request=disk_limit, diff --git a/renku/core/storage.py b/renku/core/storage.py index bf6ffea8d2..e03db9bac7 100644 --- a/renku/core/storage.py +++ b/renku/core/storage.py @@ -284,24 +284,24 @@ def list_unpushed_lfs_paths(repository: "Repository"): @check_external_storage_wrapper def pull_paths_from_storage(repository: "Repository", *paths): """Pull paths from LFS.""" - client_dict = defaultdict(list) + project_dict = defaultdict(list) for path in expand_directories(paths): - _, _, _, path = get_in_submodules(repository, repository.head.commit, path) + sub_repository, _, path = get_in_submodules(repository, repository.head.commit, path) try: absolute_path = Path(path).resolve() relative_path = absolute_path.relative_to(project_context.path) except ValueError: # An external file absolute_path = Path(os.path.abspath(path)) relative_path = absolute_path.relative_to(project_context.path) - client_dict[project_context.path].append(shlex.quote(str(relative_path))) + project_dict[sub_repository.path].append(shlex.quote(str(relative_path))) - for client_path, file_paths in client_dict.items(): + for project_path, file_paths in project_dict.items(): result = run_command( _CMD_STORAGE_PULL, *file_paths, separator=",", - cwd=client_path, + cwd=project_path, stdout=PIPE, stderr=STDOUT, universal_newlines=True, @@ -314,7 +314,7 @@ def pull_paths_from_storage(repository: "Repository", *paths): @check_external_storage_wrapper def clean_storage_cache(*paths): """Remove paths from lfs cache.""" - client_dict = defaultdict(list) + project_dict = defaultdict(list) repositories = {} tracked_paths = {} unpushed_paths = {} @@ -324,9 +324,7 @@ def clean_storage_cache(*paths): repository = project_context.repository for path in expand_directories(paths): - _, current_repository, _, path = get_in_submodules( - repository=repository, commit=repository.head.commit, path=path - ) + current_repository, _, path = get_in_submodules(repository=repository, commit=repository.head.commit, path=path) try: absolute_path = Path(path).resolve() relative_path = absolute_path.relative_to(project_context.path) @@ -346,11 +344,11 @@ def clean_storage_cache(*paths): elif absolute_path not in tracked_paths[project_context.path]: untracked_paths.append(str(relative_path)) else: - client_dict[project_context.path].append(str(relative_path)) + project_dict[project_context.path].append(str(relative_path)) repositories[project_context.path] = current_repository - for client_path, paths in client_dict.items(): - current_repository = repositories[client_path] + for project_path, paths in project_dict.items(): + current_repository = repositories[project_path] for path in paths: with open(path, "r") as tracked_file: @@ -365,7 +363,9 @@ def clean_storage_cache(*paths): with tempfile.NamedTemporaryFile(mode="w+t", encoding="utf-8", delete=False) as tmp, open( path, "r+t" ) as input_file: - result = run(_CMD_STORAGE_CLEAN, cwd=client_path, stdin=input_file, stdout=tmp, universal_newlines=True) + result = run( + _CMD_STORAGE_CLEAN, cwd=project_path, stdin=input_file, stdout=tmp, universal_newlines=True + ) if result.returncode != 0: raise errors.GitLFSError(f"Error executing 'git lfs clean: \n {result.stdout}") diff --git a/renku/core/template/template.py b/renku/core/template/template.py index 1af411e6e2..c8ec29a08c 100644 --- a/renku/core/template/template.py +++ b/renku/core/template/template.py @@ -117,12 +117,12 @@ def has_template_checksum() -> bool: return os.path.exists(project_context.template_checksums_path) -def copy_template_to_client( +def copy_template_to_project( rendered_template: RenderedTemplate, project: "Project", actions: Dict[str, FileAction], cleanup=True ): """Update project files and metadata from a template.""" - def copy_template_metadata_to_client(): + def copy_template_metadata_to_project(): """Update template-related metadata in a project.""" write_template_checksum(rendered_template.checksums) @@ -171,7 +171,7 @@ def copy_template_metadata_to_client(): raise errors.TemplateUpdateError(f"Cannot write to '{destination}'") from e - copy_template_metadata_to_client() + copy_template_metadata_to_project() def get_sorted_actions(actions: Dict[str, FileAction]) -> Dict[str, FileAction]: diff --git a/renku/core/template/usecase.py b/renku/core/template/usecase.py index da340deac7..0d2ed1d474 100644 --- a/renku/core/template/usecase.py +++ b/renku/core/template/usecase.py @@ -27,14 +27,13 @@ from renku.command.command_builder.command import inject from renku.command.view_model.template import TemplateChangeViewModel, TemplateViewModel from renku.core import errors -from renku.core.interface.client_dispatcher import IClientDispatcher from renku.core.interface.project_gateway import IProjectGateway -from renku.core.management.migrate import is_renku_project +from renku.core.migration.migrate import is_renku_project from renku.core.template.template import ( FileAction, RepositoryTemplates, TemplateAction, - copy_template_to_client, + copy_template_to_project, fetch_templates_source, get_file_actions, has_template_checksum, @@ -87,12 +86,8 @@ def check_for_template_update(project: Optional[Project]) -> Tuple[bool, bool, O return update_available, metadata.allow_update, metadata.reference, latest_reference -@inject.autoparams("client_dispatcher") -def set_template( - source, reference, id, force, interactive, input_parameters, dry_run, client_dispatcher: IClientDispatcher -) -> TemplateChangeViewModel: +def set_template(source, reference, id, force, interactive, input_parameters, dry_run) -> TemplateChangeViewModel: """Set template for a project.""" - client = client_dispatcher.current_client project = project_context.project if project.template_source and not force: @@ -113,19 +108,13 @@ def set_template( dry_run=dry_run, template_action=TemplateAction.SET, input_parameters=input_parameters, - client=client, ) return TemplateChangeViewModel.from_template(template=rendered_template, actions=actions) -@inject.autoparams("client_dispatcher") -def update_template( - force, interactive, dry_run, client_dispatcher: IClientDispatcher -) -> Optional[TemplateChangeViewModel]: +def update_template(force, interactive, dry_run) -> Optional[TemplateChangeViewModel]: """Update project's template if possible. Return True if updated.""" - client = client_dispatcher.current_client - template_metadata = TemplateMetadata.from_project(project=project_context.project) if not template_metadata.source: @@ -164,7 +153,6 @@ def update_template( dry_run=dry_run, template_action=TemplateAction.UPDATE, input_parameters=None, - client=client, ) return TemplateChangeViewModel.from_template(template=rendered_template, actions=actions) @@ -179,7 +167,6 @@ def _set_or_update_project_from_template( dry_run: bool, template_action: TemplateAction, input_parameters, - client, project_gateway: IProjectGateway, ) -> Tuple[RenderedTemplate, Dict[str, FileAction]]: """Update project files and metadata from a template.""" @@ -212,7 +199,7 @@ def _set_or_update_project_from_template( ) if not dry_run: - copy_template_to_client(rendered_template=rendered_template, project=project, actions=actions) + copy_template_to_project(rendered_template=rendered_template, project=project, actions=actions) project_gateway.update_project(project) return rendered_template, actions diff --git a/renku/core/util/contexts.py b/renku/core/util/contexts.py index cd0b5f2787..aea78db8b5 100644 --- a/renku/core/util/contexts.py +++ b/renku/core/util/contexts.py @@ -20,12 +20,14 @@ import contextlib import os import sys +import time from pathlib import Path -from typing import Union - -import click +from typing import Dict, List, Optional, Union +from renku.command.command_builder import inject from renku.core import errors +from renku.core.interface.database_gateway import IDatabaseGateway +from renku.core.interface.project_gateway import IProjectGateway @contextlib.contextmanager @@ -56,7 +58,7 @@ def __enter__(self): setattr(sys, self._stream, self._new_target) return self._new_target - def __exit__(self, exctype, excinst, exctb): + def __exit__(self, exception_type, exception_value, traceback): """Restore the stream value.""" setattr(sys, self._stream, self._old_targets.pop()) @@ -106,17 +108,63 @@ def measure(message="TOTAL"): @contextlib.contextmanager -def click_context(path, command): - """Provide a click context with repo path injected.""" - from renku.core.management.client import LocalClient +def renku_project_context(path): + """Provide a project context with repo path injected.""" from renku.core.util.git import get_git_path from renku.domain_model.project_context import project_context - with project_context.with_path(get_git_path(path)) as project_context, click.Context( - click.Command(command), obj=LocalClient() - ).scope() as ctx: + path = get_git_path(path) + + with project_context.with_path(path=path), chdir(path): project_context.external_storage_requested = True - yield project_context.path, ctx + yield project_context.path + + +@contextlib.contextmanager +@inject.autoparams("project_gateway", "database_gateway") +def with_project_metadata( + project_gateway: IProjectGateway, + database_gateway: IDatabaseGateway, + read_only: bool = False, + name: Optional[str] = None, + namespace: Optional[str] = None, + description: Optional[str] = None, + keywords: Optional[List[str]] = None, + custom_metadata: Optional[Dict] = None, +): + """Yield an editable metadata object. + + Args: + project_gateway(IProjectGateway): Injected project gateway. + database_gateway(IDatabaseGateway): Injected database gateway. + read_only(bool): Whether to save changes or not (Default value = False). + name(Optional[str]): Name of the project (when creating a new one) (Default value = None). + namespace(Optional[str]): Namespace of the project (when creating a new one) (Default value = None). + description(Optional[str]): Project description (when creating a new one) (Default value = None). + keywords(Optional[List[str]]): Keywords for the project (when creating a new one) (Default value = None). + custom_metadata(Optional[Dict]): Custom JSON-LD metadata (when creating a new project) + (Default value = None). + """ + from renku.domain_model.project import Project + from renku.domain_model.project_context import project_context + + try: + project = project_gateway.get_project() + except ValueError: + project = Project.from_project_context( + project_context=project_context, + name=name, + namespace=namespace, + description=description, + keywords=keywords, + custom_metadata=custom_metadata, + ) + + yield project + + if not read_only: + project_gateway.update_project(project) + database_gateway.commit() @contextlib.contextmanager @@ -139,3 +187,15 @@ def Lock(filename: Union[Path, str], timeout: int = 0, mode: str = "shared", blo yield except (portalocker.LockException, portalocker.AlreadyLocked) as e: raise errors.LockError(f"Cannot lock {e.__class__.__name__}") + + +@contextlib.contextmanager +def wait_for(delay: float): + """Make sure that at least ``delay`` seconds are passed during the execution of the wrapped code block.""" + start = time.time() + + yield + + exec_time = time.time() - start + if exec_time < delay: + time.sleep(delay - exec_time) diff --git a/renku/core/util/dispatcher.py b/renku/core/util/dispatcher.py index dcfa10e1a4..21edd1bc52 100644 --- a/renku/core/util/dispatcher.py +++ b/renku/core/util/dispatcher.py @@ -20,23 +20,11 @@ from typing import TYPE_CHECKING from renku.command.command_builder.command import inject -from renku.core.interface.client_dispatcher import IClientDispatcher from renku.core.interface.storage import IStorageFactory if TYPE_CHECKING: from renku.core.dataset.providers.api import ProviderApi, ProviderCredentials from renku.core.interface.storage import IStorage - from renku.core.management.client import LocalClient - - -def get_client() -> "LocalClient": - """Return current client.""" - - @inject.autoparams() - def get_client_helper(client_dispatcher: IClientDispatcher): - return client_dispatcher.current_client - - return get_client_helper() def get_storage(provider: "ProviderApi", credentials: "ProviderCredentials") -> "IStorage": diff --git a/renku/core/util/git.py b/renku/core/util/git.py index 57d6988b85..b115c86058 100644 --- a/renku/core/util/git.py +++ b/renku/core/util/git.py @@ -17,10 +17,13 @@ # limitations under the License. """Git utility functions.""" +import contextlib import os import pathlib import re import shutil +import sys +import time import urllib from functools import reduce from pathlib import Path @@ -31,13 +34,15 @@ from renku.core import errors if TYPE_CHECKING: - from renku.core.management.client import LocalClient from renku.domain_model.entity import Collection, Entity from renku.domain_model.git import GitURL from renku.domain_model.provenance.agent import Person, SoftwareAgent from renku.infrastructure.repository import Commit, Remote, Repository +COMMIT_DIFF_STRATEGY = "DIFF" +STARTED_AT = int(time.time() * 1e3) + BRANCH_NAME_LIMIT = 250 CLI_GITLAB_ENDPOINT = "repos" PROTECTED_BRANCH_PREFIX = "renku/autobranch" @@ -130,11 +135,10 @@ def get_oauth_url(url, gitlab_token): return parsed_url._replace(netloc=netloc).geturl() -def get_cache_directory_for_repository(client, url) -> Path: - """Return a path to client's cache directory. +def get_cache_directory_for_repository(url) -> Path: + """Return a path to project's cache directory. Args: - client: ``LocalCLient``. url: The repository URL. Returns: @@ -736,7 +740,7 @@ def clean_directory(): except PermissionError as e: raise errors.InvalidFileOperation(f"Cannot delete files in {path}: Permission denied") from e - def check_and_reuse_existing_repository() -> Optional[Repository]: + def check_and_reuse_existing_repository() -> Optional["Repository"]: if path is None or not cast(Path, path).exists(): return None @@ -939,16 +943,11 @@ def shorten_message(message: str, line_length: int = 100, body_length: int = 650 def get_in_submodules( repository: "Repository", commit: "Commit", path: Union[Path, str] -) -> Tuple["LocalClient", "Repository", "Commit", Path]: +) -> Tuple["Repository", "Commit", Path]: """Resolve filename in submodules.""" - from renku.core.management.client import LocalClient - from renku.domain_model.project_context import project_context - original_path = repository.path / path in_vendor = str(path).startswith(".renku/vendors") - client = LocalClient() - if original_path.is_symlink() or in_vendor: resolved_path = original_path.resolve() @@ -959,14 +958,12 @@ def get_in_submodules( try: path_within_submodule = resolved_path.relative_to(submodule.path) commit = submodule.get_previous_commit(path=path_within_submodule, revision=commit.hexsha) - with project_context.with_path(submodule.path): - subclient = LocalClient() except (ValueError, errors.GitCommitNotFoundError): pass else: - return subclient, submodule, commit, path_within_submodule + return submodule, commit, path_within_submodule - return client, repository, commit, Path(path) + return repository, commit, Path(path) def get_dirty_paths(repository: "Repository") -> Set[str]: @@ -975,3 +972,149 @@ def get_dirty_paths(repository: "Repository") -> Set[str]: staged_files = [d.a_path for d in repository.staged_changes] if repository.head.is_valid() else [] return {os.path.join(repository.path, p) for p in repository.untracked_files + modified_files + staged_files} + + +@contextlib.contextmanager +def with_commit( + *, + repository: "Repository", + transaction_id: str, + commit_only=None, + commit_empty=True, + raise_if_empty=False, + commit_message=None, + abbreviate_message=True, + skip_dirty_checks=False, +): + """Automatic commit.""" + diff_before = prepare_commit(repository=repository, commit_only=commit_only, skip_dirty_checks=skip_dirty_checks) + + yield + + finalize_commit( + diff_before=diff_before, + repository=repository, + transaction_id=transaction_id, + commit_only=commit_only, + commit_empty=commit_empty, + raise_if_empty=raise_if_empty, + commit_message=commit_message, + abbreviate_message=abbreviate_message, + ) + + +def prepare_commit(*, repository: "Repository", commit_only=None, skip_dirty_checks=False, skip_staging: bool = False): + """Gather information about repo needed for committing later on.""" + + def ensure_not_untracked(path): + """Ensure that path is not part of git untracked files.""" + for file_path in repository.untracked_files: + is_parent = (repository.path / file_path).parent == (repository.path / path) + is_equal = str(path) == file_path + + if is_parent or is_equal: + raise errors.DirtyRenkuDirectory(repository) + + def ensure_not_staged(path): + """Ensure that path is not part of git staged files.""" + path = str(path) + for file_path in repository.staged_changes: + is_parent = str(file_path.a_path).startswith(path) + is_equal = path == file_path.a_path + + if is_parent or is_equal: + raise errors.DirtyRenkuDirectory(repository) + + if skip_staging: + if not isinstance(commit_only, list) or len(commit_only) == 0: + raise errors.OperationError("Cannot use ``skip_staging`` without specifying files to commit.") + + diff_before = set() + + if commit_only == COMMIT_DIFF_STRATEGY: + if len(repository.staged_changes) > 0 or len(repository.unstaged_changes) > 0: + repository.reset() + + # Exclude files created by pipes. + diff_before = { + file for file in repository.untracked_files if STARTED_AT - int(Path(file).stat().st_ctime * 1e3) >= 1e3 + } + + if isinstance(commit_only, list) and not skip_dirty_checks: + for path in commit_only: + ensure_not_untracked(path) + ensure_not_staged(path) + + return diff_before + + +def finalize_commit( + *, + diff_before, + repository: "Repository", + transaction_id: str, + commit_only=None, + commit_empty=True, + raise_if_empty=False, + commit_message=None, + abbreviate_message=True, + skip_staging: bool = False, +): + """Commit modified/added paths.""" + from renku.core.util.urls import remove_credentials + from renku.infrastructure.repository import Actor + from renku.version import __version__, version_url + + committer = Actor(name=f"renku {__version__}", email=version_url) + + change_types = {item.a_path: item.change_type for item in repository.unstaged_changes} + + if commit_only == COMMIT_DIFF_STRATEGY: + # Get diff generated in command. + staged_after = set(change_types.keys()) + + modified_after_change_types = {item.a_path: item.change_type for item in repository.staged_changes} + + modified_after = set(modified_after_change_types.keys()) + + change_types.update(modified_after_change_types) + + diff_after = set(repository.untracked_files).union(staged_after).union(modified_after) + + # Remove files not touched in command. + commit_only = list(diff_after - diff_before) + + if isinstance(commit_only, list): + for path_ in commit_only: + p = repository.path / path_ + if p.exists() or change_types.get(str(path_)) == "D": + repository.add(path_) + + if not commit_only: + repository.add(all=True) + + try: + diffs = [d.a_path for d in repository.staged_changes] + except errors.GitError: + diffs = [] + + if not commit_empty and not diffs: + if raise_if_empty: + raise errors.NothingToCommit() + return + + if commit_message and not isinstance(commit_message, str): + raise errors.CommitMessageEmpty() + + elif not commit_message: + argv = [os.path.basename(sys.argv[0])] + [remove_credentials(arg) for arg in sys.argv[1:]] + + commit_message = " ".join(argv) + + if abbreviate_message: + commit_message = shorten_message(commit_message) + + # NOTE: Only commit specified paths when skipping staging area + paths = commit_only if skip_staging else [] + # Ignore pre-commit hooks since we have already done everything. + repository.commit(commit_message + transaction_id, committer=committer, no_verify=True, paths=paths) diff --git a/renku/core/util/metadata.py b/renku/core/util/metadata.py index d0c57e303e..6ee9b21653 100644 --- a/renku/core/util/metadata.py +++ b/renku/core/util/metadata.py @@ -91,12 +91,12 @@ def construct_creator(creator: Union[dict, str], ignore_email) -> Tuple[Optional return person, no_email_warning -def is_external_file(path: Union[Path, str], client_path: Path): +def is_external_file(path: Union[Path, str], project_path: Path): """Checks if a path is an external file.""" from renku.core.constant import POINTERS, RENKU_HOME - path = client_path / path - if not path.is_symlink() or not is_subpath(path=path, base=client_path): + path = project_path / path + if not path.is_symlink() or not is_subpath(path=path, base=project_path): return False pointer = os.readlink(path) @@ -127,9 +127,9 @@ def read_renku_version_from_dockerfile(path: Union[Path, str]) -> Optional[str]: return None -def make_project_temp_dir(client_path: Path) -> Path: +def make_project_temp_dir(project_path: Path) -> Path: """Create a temporary directory inside project's temp path.""" - base = client_path / RENKU_HOME / RENKU_TMP + base = project_path / RENKU_HOME / RENKU_TMP base.mkdir(parents=True, exist_ok=True) return Path(tempfile.mkdtemp(dir=base)) diff --git a/renku/core/workflow/activity.py b/renku/core/workflow/activity.py index 6e4886e0d0..01cb229d53 100644 --- a/renku/core/workflow/activity.py +++ b/renku/core/workflow/activity.py @@ -253,7 +253,7 @@ def get_downstream_generating_activities( starting_activities: Set[Activity], paths: List[str], ignore_deleted: bool, - client_path: Path, + project_path: Path, activity_gateway: IActivityGateway, ) -> List[Activity]: """Return activities downstream of passed activities that generate at least a path in ``paths``. @@ -262,7 +262,7 @@ def get_downstream_generating_activities( starting_activities(Set[Activity]): Activities to use as starting/upstream nodes. paths(List[str]): Optional generated paths to end downstream chains at. ignore_deleted(bool): Whether to ignore deleted generations. - client_path(Path): Path to project's root directory. + project_path(Path): Path to project's root directory. activity_gateway(IActivityGateway): The injected Activity gateway. Returns: @@ -283,7 +283,7 @@ def does_activity_generate_any_paths(activity) -> bool: def has_an_existing_generation(activity) -> bool: for generation in activity.generations: - if (client_path / generation.entity.path).exists(): + if (project_path / generation.entity.path).exists(): return True return False diff --git a/renku/core/workflow/execute.py b/renku/core/workflow/execute.py index 0c7a1671e1..90c78dda7f 100644 --- a/renku/core/workflow/execute.py +++ b/renku/core/workflow/execute.py @@ -89,7 +89,12 @@ def execute_workflow_graph( for plan in dag.nodes: # NOTE: Update plans are copies of Plan objects. We need to use the original Plan objects to avoid duplicates. original_plan = plan_gateway.get_by_id(plan.id) - activity = Activity.from_plan(plan=plan, started_at_time=started_at_time, ended_at_time=ended_at_time) + activity = Activity.from_plan( + plan=plan, + repository=project_context.repository, + started_at_time=started_at_time, + ended_at_time=ended_at_time, + ) activity.association.plan = original_plan activity_gateway.add(activity) activities.append(activity) diff --git a/renku/core/workflow/plan.py b/renku/core/workflow/plan.py index 8afa973b49..a04d0ae1ec 100644 --- a/renku/core/workflow/plan.py +++ b/renku/core/workflow/plan.py @@ -29,7 +29,6 @@ from renku.command.view_model.plan import plan_view from renku.core import errors from renku.core.interface.activity_gateway import IActivityGateway -from renku.core.interface.client_dispatcher import IClientDispatcher from renku.core.interface.plan_gateway import IPlanGateway from renku.core.interface.project_gateway import IProjectGateway from renku.core.util import communication @@ -285,7 +284,6 @@ def compose_workflow( activity_gateway: IActivityGateway, plan_gateway: IPlanGateway, project_gateway: IProjectGateway, - client_dispatcher: IClientDispatcher, ) -> CompositePlan: """Compose workflows into a CompositePlan. @@ -307,7 +305,6 @@ def compose_workflow( activity_gateway(IActivityGateway): Injected activity gateway. plan_gateway(IPlanGateway): Injected plan gateway. project_gateway(IProjectGateway): Injected project gateway. - client_dispatcher(IClientDispatcher): Injected client dispatcher. Returns: The newly created ``CompositePlan``. @@ -439,7 +436,6 @@ def export_workflow( Args: name_or_id: name or id of the Plan to export - client_dispatcher(IClientDispatcher): Injected client dispatcher. plan_gateway(IPlanGateway): The injected Plan gateway. format(str): Format to export to. output(Optional[str]): Output path to store result at. @@ -516,7 +512,6 @@ def visualize_graph( sources: List[str], targets: List[str], show_files: bool, - client_dispatcher: IClientDispatcher, activity_gateway: IActivityGateway, revision: Optional[str] = None, ): @@ -526,7 +521,6 @@ def visualize_graph( sources(List[str]): Input paths to start the visualized graph at. targets(List[str]): Output paths to end the visualized graph at. show_files(bool): Whether or not to show file nodes. - client_dispatcher(IClientDispatcher): The client dispatcher. activity_gateway(IActivityGateway): The injected activity gateway. revision(Optional[str], optional): Revision or revision range to show the graph for (Default value = None) diff --git a/renku/core/workflow/plan_factory.py b/renku/core/workflow/plan_factory.py index b9e14e35ee..a973ee5d59 100644 --- a/renku/core/workflow/plan_factory.py +++ b/renku/core/workflow/plan_factory.py @@ -24,7 +24,7 @@ from contextlib import contextmanager from itertools import chain from pathlib import Path -from typing import Any, Dict, List, Optional, Set, Tuple, Union, cast +from typing import Any, Dict, Iterable, List, Optional, Set, Tuple, Union, cast import click import yaml @@ -62,11 +62,11 @@ class PlanFactory: def __init__( self, - command_line: str, + command_line: Union[str, List[str], Tuple[str, ...]], explicit_inputs: Optional[List[Tuple[str, str]]] = None, explicit_outputs: Optional[List[Tuple[str, str]]] = None, explicit_parameters: Optional[List[Tuple[str, Optional[str]]]] = None, - directory: Optional[str] = None, + directory: Optional[Union[Path, str]] = None, working_dir: Optional[Union[Path, str]] = None, no_input_detection: bool = False, no_output_detection: bool = False, @@ -161,7 +161,7 @@ def _resolve_existing_subpath(self, candidate: Union[Path, str]) -> Optional[Pat # (e.g. /bin/bash) if is_subpath(path, base=self.working_dir): return path - elif is_external_file(path=candidate, client_path=self.working_dir): + elif is_external_file(path=candidate, project_path=self.working_dir): return Path(os.path.abspath(candidate)) return None @@ -257,7 +257,7 @@ def add_inputs_and_parameters(self, *arguments): assert isinstance(default, File) self.add_command_input(default_value=str(default), encoding_format=default.mime_type, position=position) - def add_outputs(self, candidates: Set[Tuple[Union[Path, str], Optional[str]]]): + def add_outputs(self, candidates: Iterable[Tuple[Union[Path, str], Optional[str]]]): """Yield detected output and changed command input parameter.""" # TODO what to do with duplicate paths & inputs with same defaults candidate_paths = list(map(lambda x: x[0], candidates)) @@ -767,28 +767,28 @@ def delete_indirect_files_list(working_dir): pass -def get_indirect_inputs_path(client_path): +def get_indirect_inputs_path(project_path): """Return path to file that contains indirect inputs list.""" - parent = _get_indirect_parent_path(client_path) + parent = _get_indirect_parent_path(project_path) return parent / "inputs.yml" -def get_indirect_outputs_path(client_path): +def get_indirect_outputs_path(project_path): """Return path to file that contains indirect outputs list.""" - parent = _get_indirect_parent_path(client_path) + parent = _get_indirect_parent_path(project_path) return parent / "outputs.yml" -def get_indirect_parameters_path(client_path): +def get_indirect_parameters_path(project_path): """Return path to file that contains indirect parameters list.""" - parent = _get_indirect_parent_path(client_path) + parent = _get_indirect_parent_path(project_path) return parent / "parameters.yml" -def _get_indirect_parent_path(client_path): +def _get_indirect_parent_path(project_path): renku_indirect_path = os.getenv("RENKU_INDIRECT_PATH") or "" - base = (Path(client_path) / RENKU_HOME / RENKU_TMP).resolve() + base = (Path(project_path) / RENKU_HOME / RENKU_TMP).resolve() parent = (base / renku_indirect_path).resolve() try: diff --git a/renku/core/workflow/providers/cwltool.py b/renku/core/workflow/providers/cwltool.py index 472cb6736c..f7c9ed60d8 100644 --- a/renku/core/workflow/providers/cwltool.py +++ b/renku/core/workflow/providers/cwltool.py @@ -51,7 +51,7 @@ import networkx as nx from cwltool.context import LoadingContext, RuntimeContext -from renku.command.echo import progressbar +from renku.command.util import progressbar from renku.core.errors import NodeNotFoundError, WorkflowExecuteError from renku.core.plugin import hookimpl from renku.core.plugin.workflow import workflow_converter diff --git a/renku/core/workflow/providers/toil.py b/renku/core/workflow/providers/toil.py index c82abc92bf..c8a017b5b7 100644 --- a/renku/core/workflow/providers/toil.py +++ b/renku/core/workflow/providers/toil.py @@ -35,7 +35,7 @@ from toil.leader import FailedJobsException from toil.lib.docker import apiDockerCall -from renku.command.echo import progressbar +from renku.command.util import progressbar from renku.core import errors from renku.core.constant import RENKU_HOME, RENKU_TMP from renku.core.errors import WorkflowExecuteError diff --git a/renku/core/workflow/run.py b/renku/core/workflow/run.py index 5f723aa90c..c9d147540c 100644 --- a/renku/core/workflow/run.py +++ b/renku/core/workflow/run.py @@ -91,7 +91,7 @@ def mark_generations_as_stale(activity): starting_activities={start_activity}, paths=generation_paths, ignore_deleted=ignore_deleted, - client_path=project_context.path, + project_path=project_context.path, ) if activities: modified_inputs.add(usage_path) diff --git a/renku/domain_model/__init__.py b/renku/domain_model/__init__.py index a69da15a75..496a0004c8 100644 --- a/renku/domain_model/__init__.py +++ b/renku/domain_model/__init__.py @@ -16,6 +16,3 @@ # See the License for the specific language governing permissions and # limitations under the License. """Model objects used in Python SDK.""" -from .datastructures import Collection, Model - -__all__ = ("Collection", "Model") diff --git a/renku/domain_model/dataset.py b/renku/domain_model/dataset.py index aef039ac05..43679bef4e 100644 --- a/renku/domain_model/dataset.py +++ b/renku/domain_model/dataset.py @@ -259,9 +259,7 @@ def __init__( self.source: Optional[str] = str(source) @classmethod - def from_path( - cls, client, path: Union[str, Path], source=None, based_on: Optional[RemoteEntity] = None - ) -> "DatasetFile": + def from_path(cls, path: Union[str, Path], source=None, based_on: Optional[RemoteEntity] = None) -> "DatasetFile": """Return an instance from a path.""" from renku.domain_model.entity import NON_EXISTING_ENTITY_CHECKSUM, Entity @@ -273,7 +271,7 @@ def from_path( else: entity = get_entity_from_revision(repository=project_context.repository, path=path, bypass_cache=True) - is_external = is_external_file(path=path, client_path=project_context.path) + is_external = is_external_file(path=path, project_path=project_context.path) return cls(entity=entity, is_external=is_external, source=source, based_on=based_on) @staticmethod diff --git a/renku/domain_model/datastructures.py b/renku/domain_model/datastructures.py index ef7abed097..2053a61a33 100644 --- a/renku/domain_model/datastructures.py +++ b/renku/domain_model/datastructures.py @@ -22,154 +22,6 @@ from pathlib import Path -class Model(object): - """Abstract response of a single object.""" - - IDENTIFIER_KEY = "identifier" - - def __init__(self, response=None, client=None, collection=None): - """Create a representation of an object on the server.""" - self._response = response if response is not None else {} - self._client = client - self._collection = collection - - @property - def id(self): - """The identifier of the object.""" - return self._response[self.IDENTIFIER_KEY] - - def __str__(self): - """Format model.""" - return "<{0.__class__.__name__} '{0.id!s}'>".format(self) - - __repr__ = __str__ - - -class Collection(object): - """Abstract response of multiple objects.""" - - class Meta: - """Store information about the model.""" - - model = None - """Define the type of object this collection represents.""" - - headers = "id" - """Which fields to use as headers when printing the collection.""" - - def __init__(self, client=None): - """Create a representation of objects on the server.""" - self._client = client - - def list(self): - """Return a list if the collection is iterable.""" - if not hasattr(self, "__iter__"): - raise NotImplementedError("The collection is not iterable.") - return list(self) - - -class LazyResponse(dict): - """Lazy load object properties.""" - - def __init__(self, getter, *args, **kwargs): - """Initialize LazyRequest.""" - self._getter = getter - self._called = False - super(LazyResponse, self).__init__(*args, **kwargs) - - def __getitem__(self, key): - """Implement KeyError check.""" - try: - return dict.__getitem__(self, key) - except KeyError: - if not self._called: - self.update(**self._getter()) - self._called = True - return dict.__getitem__(self, key) - raise - - -class IndexedList(list): - """List allowing to query items by id or by named index. - - Example: - >>> from collections import namedtuple - >>> Item = namedtuple('Item', 'key, value') - >>> items = IndexedList(Item('a', 1), Item('b', 2), attr='key') - >>> items[0].value - 1 - >>> items['a'].value - 1 - >>> items.b.value - 2 - >>> items[0] in items - True - >>> 'a' in items - True - >>> 'c' not in items - True - - The attribute name must be always defined. - - >>> IndexedList() - Traceback (most recent call last): - ... - ValueError: The attribute name must be defined. - - """ - - __slots__ = ("_attr_name", "_prefix") - - def __new__(cls, *args, attr=None, prefix=""): - """Call list constructor.""" - return super().__new__(cls) - - def __init__(self, *args, attr=None, prefix=""): - """Store index information.""" - if attr is None: - raise ValueError("The attribute name must be defined.") - - self._attr_name = attr - self._prefix = prefix - - self.extend(args) - - def __contains__(self, attr): - """Check existence of attribute value or object itself.""" - #: Check if the instance is in the list. - rval = list.__contains__(self, attr) - if rval: - return rval - - #: Find item by attribute value. - try: - getattr(self, attr) - return True - except (AttributeError, TypeError): - return False - - def __getattr__(self, attr): - """Find item by named index.""" - attr_name = self._prefix + attr - for item in self: - #: Find object by attribute value. - if getattr(item, self._attr_name) == attr_name: - return item - - #: Return instance attrubutes. - return list.__getattribute__(self, attr) - - def __getitem__(self, index): - """Find item by named index.""" - if isinstance(index, int): - return list.__getitem__(self, index) - - try: - return getattr(self, index) - except AttributeError: - raise IndexError("No item found with id {0}".format(self._prefix + index)) - - class DirectoryTree(dict): r"""Create a safe directory tree from paths. diff --git a/renku/domain_model/project.py b/renku/domain_model/project.py index c8cb726d33..e8c720113c 100644 --- a/renku/domain_model/project.py +++ b/renku/domain_model/project.py @@ -18,7 +18,7 @@ """Project class.""" from datetime import datetime -from typing import Dict, List, Optional, cast +from typing import TYPE_CHECKING, Dict, List, Optional, cast from urllib.parse import quote import persistent @@ -28,11 +28,14 @@ from renku.core.util.git import get_git_user from renku.core.util.os import normalize_to_ascii from renku.core.util.util import NO_VALUE -from renku.domain_model.project_context import project_context from renku.domain_model.provenance.agent import Person from renku.domain_model.provenance.annotation import Annotation from renku.version import __minimum_project_version__ +if TYPE_CHECKING: + from renku.domain_model.project_context import ProjectContext, ProjectRemote + from renku.infrastructure.repository import Repository + class Project(persistent.Persistent): """Represent a project.""" @@ -63,7 +66,7 @@ def __init__( version: Optional[str] = None, keywords: Optional[List[str]] = None, ): - from renku.core.management.migrate import SUPPORTED_PROJECT_VERSION + from renku.core.migration.migrate import SUPPORTED_PROJECT_VERSION version = cast(str, version or SUPPORTED_PROJECT_VERSION) date_created = parse_date(date_created) or local_now() @@ -94,8 +97,9 @@ def __init__( self.minimum_renku_version = Project.minimum_renku_version @classmethod - def from_client( + def from_project_context( cls, + project_context: "ProjectContext", name: Optional[str] = None, namespace: Optional[str] = None, description: Optional[str] = None, @@ -115,10 +119,11 @@ def from_client( (Default value = None). creator(Optional[Person]): The project creator. """ + creator = creator or get_git_user(repository=project_context.repository) + namespace, name = cls.get_namespace_and_name( - use_project_context=True, name=name, namespace=namespace, creator=creator + remote=project_context.remote, name=name, namespace=namespace, creator=creator ) - creator = creator or get_git_user(repository=project_context.repository) annotations = None if custom_metadata: @@ -141,19 +146,19 @@ def from_client( @staticmethod def get_namespace_and_name( *, - use_project_context: bool = False, + remote: Optional["ProjectRemote"] = None, + repository: Optional["Repository"] = None, name: Optional[str] = None, namespace: Optional[str] = None, creator: Optional[Person] = None, ): """Return Project's namespace and name from various objects.""" - if use_project_context: - remote = project_context.remote + if remote: namespace = namespace or remote.owner name = name or remote.name - if not creator: - creator = get_git_user(repository=project_context.repository) + if not creator and repository: + creator = get_git_user(repository=repository) if not namespace and creator: namespace = creator.email.split("@")[0] diff --git a/renku/domain_model/project_context.py b/renku/domain_model/project_context.py index dcece73ccc..a85e258267 100644 --- a/renku/domain_model/project_context.py +++ b/renku/domain_model/project_context.py @@ -165,23 +165,20 @@ def project(self) -> "Project": from renku.command.command_builder.command import inject from renku.core.interface.project_gateway import IProjectGateway - if not self._top.project: - project_gateway = inject.instance(IProjectGateway) - self._top.project = project_gateway.get_project() - - return self._top.project + project_gateway = inject.instance(IProjectGateway) + # NOTE: Don't cache the project since it can be updated in the ``ProjectGateway`` + return project_gateway.get_project() @property def remote(self) -> "ProjectRemote": """Return host, owner and name of the remote if it exists.""" + from renku.core.util.git import get_remote + repository = self.repository - remote: Optional["Remote"] - if repository.active_branch and repository.active_branch.remote_branch: - remote = repository.active_branch.remote_branch.remote - elif len(repository.remotes) == 1: - remote = repository.remotes[0] - else: + remote = get_remote(repository=repository) + + if not remote and len(repository.remotes) > 1: remote = repository.remotes.get("origin") return ProjectRemote.from_remote(remote=remote) @@ -211,7 +208,7 @@ def template_checksums_path(self): @property def transaction_id(self) -> str: - """Get a transaction id for the current client to be used for grouping git commits.""" + """Get a transaction id for the current context to be used for grouping git commits.""" if not self._top.transaction_id: self._top.transaction_id = uuid.uuid4().hex @@ -225,6 +222,10 @@ def _top(self) -> "ProjectProperties": raise errors.ConfigurationError("No project context was pushed") + def has_context(self) -> bool: + """Return if at least one context is pushed.""" + return bool(self._context_stack) + def clear(self) -> None: """Remove all contexts and reset the state without committing intermediate changes. @@ -276,11 +277,6 @@ def replace_path(self, path: Union[Path, str]): elif self._top.path != path: self._context_stack[-1] = ProjectProperties(path=path) - def reset_project(self) -> None: - """Discard cached project value.""" - if self._context_stack: - self._top.project = None - @contextlib.contextmanager def with_path( self, path: Union[Path, str], save_changes: bool = False @@ -335,7 +331,6 @@ class ProjectProperties: path: Path database: Optional["Database"] = None datadir: Optional[str] = None - project: Optional["Project"] = None repository: Optional["Repository"] = None save_changes: bool = False transaction_id: Optional[str] = None diff --git a/renku/domain_model/provenance/activity.py b/renku/domain_model/provenance/activity.py index 023e777798..f66a395da6 100644 --- a/renku/domain_model/provenance/activity.py +++ b/renku/domain_model/provenance/activity.py @@ -29,13 +29,13 @@ from renku.core.util.datetime8601 import local_now from renku.core.util.git import get_entity_from_revision, get_git_user from renku.domain_model.entity import Collection, Entity -from renku.domain_model.project_context import project_context from renku.domain_model.provenance.agent import Person, SoftwareAgent from renku.domain_model.provenance.annotation import Annotation from renku.domain_model.provenance.parameter import ParameterValue from renku.domain_model.workflow.plan import Plan from renku.infrastructure.database import Persistent from renku.infrastructure.immutable import Immutable +from renku.infrastructure.repository import Repository from renku.version import __version__, version_url @@ -132,6 +132,7 @@ def __init__( def from_plan( cls, plan: Plan, + repository: "Repository", project_gateway: IProjectGateway, started_at_time: datetime, ended_at_time: datetime, @@ -147,7 +148,6 @@ def from_plan( parameter_values = [] activity_id = id or cls.generate_id() - repository = project_context.repository for input in plan.inputs: input_path = input.actual_value diff --git a/renku/domain_model/session.py b/renku/domain_model/session.py index c9a80aae37..70027cfbcd 100644 --- a/renku/domain_model/session.py +++ b/renku/domain_model/session.py @@ -21,10 +21,7 @@ from abc import ABCMeta, abstractmethod from pathlib import Path -from typing import TYPE_CHECKING, Any, Dict, List, Optional - -if TYPE_CHECKING: - from renku.core.management.client import LocalClient +from typing import Any, Dict, List, Optional class Session: @@ -96,7 +93,6 @@ def session_start( image_name: str, project_name: str, config: Optional[Dict[str, Any]], - client: "LocalClient", cpu_request: Optional[float] = None, mem_request: Optional[str] = None, disk_request: Optional[str] = None, @@ -108,7 +104,6 @@ def session_start( image_name(str): Container image name to be used for the interactive session. project_name(str): The project identifier. config(Optional[Dict[str, Any]]): Path to the session provider specific configuration YAML. - client(LocalClient): Renku client. cpu_request(Optional[float]): CPU request for the session. mem_request(Optional[str]): Memory size request for the session. disk_request(Optional[str]): Disk size request for the session. @@ -124,7 +119,7 @@ def session_stop(self, project_name: str, session_name: Optional[str], stop_all: """Stops all or a given interactive session. Args: - client: Renku client. + project_name: Project's name. session_name: The unique id of the interactive session. stop_all: Specifies whether or not to stop all the running interactive sessions. diff --git a/renku/ui/api/models/project.py b/renku/ui/api/models/project.py index 4ca769cfa6..0c61909dab 100644 --- a/renku/ui/api/models/project.py +++ b/renku/ui/api/models/project.py @@ -83,8 +83,8 @@ def __enter__(self): return self def __exit__(self, type, value, traceback): - project_context = self._project_contexts.pop() - if project_context is not self: + context = self._project_contexts.pop() + if context is not self: raise RuntimeError("Project context was changed.") @property @@ -108,9 +108,4 @@ def status(self, paths: Optional[List[Union[Path, str]]] = None, ignore_deleted: StatusResult: Status of the project. """ - return ( - get_status_command() - .with_client_path(self._path) - .build() - .execute(paths=paths, ignore_deleted=ignore_deleted) - ).output + return get_status_command().build().execute(paths=paths, ignore_deleted=ignore_deleted).output diff --git a/renku/ui/cli/__init__.py b/renku/ui/cli/__init__.py index db0ce3b9cd..f4cfe8b6e8 100644 --- a/renku/ui/cli/__init__.py +++ b/renku/ui/cli/__init__.py @@ -87,8 +87,8 @@ import yaml from click_plugins import with_plugins -from renku.command.echo import WARNING from renku.command.options import option_external_storage_requested +from renku.command.util import WARNING from renku.command.version import check_version, print_version from renku.core import errors from renku.core.constant import DATABASE_PATH @@ -166,7 +166,7 @@ def _is_renku_project(path: Path) -> bool: yaml.add_representer(uuid.UUID, _uuid_representer) -def print_global_config_path(ctx, param, value): +def print_global_config_path(ctx, _, value): """Print global application's config path.""" if not value or ctx.resilient_parsing: return @@ -230,7 +230,6 @@ def is_allowed_command(ctx): @click.pass_context def cli(ctx, path, external_storage_requested): """Check common Renku commands used in various situations.""" - from renku.core.management.client import LocalClient from renku.domain_model.project_context import project_context path = Path(path) @@ -245,7 +244,6 @@ def cli(ctx, path, external_storage_requested): project_context.push_path(path) project_context.external_storage_requested = external_storage_requested - ctx.obj = LocalClient() if is_renku_project and path != Path(os.getcwd()) and not is_command_allowed: click.secho(WARNING + "Run CLI commands only from project's root directory.\n", err=True) diff --git a/renku/ui/cli/exception_handler.py b/renku/ui/cli/exception_handler.py index e70d3c2bad..325884d136 100644 --- a/renku/ui/cli/exception_handler.py +++ b/renku/ui/cli/exception_handler.py @@ -62,7 +62,7 @@ import click import renku.ui.cli.utils.color as color -from renku.command.echo import ERROR +from renku.command.util import ERROR from renku.core import errors from renku.ui.service.config import SENTRY_ENABLED, SENTRY_SAMPLERATE diff --git a/renku/ui/cli/init.py b/renku/ui/cli/init.py index 406b03b4a9..0d6721e8f2 100644 --- a/renku/ui/cli/init.py +++ b/renku/ui/cli/init.py @@ -308,7 +308,6 @@ def init( communicator = ClickCallback() init_command().with_communicator(communicator).build().execute( - ctx=ctx, external_storage_requested=external_storage_requested, path=path, name=name, diff --git a/renku/ui/cli/migrate.py b/renku/ui/cli/migrate.py index 7585f7ea32..83fff78712 100644 --- a/renku/ui/cli/migrate.py +++ b/renku/ui/cli/migrate.py @@ -64,7 +64,7 @@ import click import renku.ui.cli.utils.color as color -from renku.command.echo import ERROR, INFO +from renku.command.util import ERROR, INFO from renku.core.errors import MigrationRequired, ProjectNotSupported from renku.ui.cli.utils.callback import ClickCallback @@ -92,7 +92,7 @@ def migrate(check, skip_template_update, skip_docker_update, strict, preserve_id TEMPLATE_UPDATE_POSSIBLE, UNSUPPORTED_PROJECT, check_project, - migrate_project, + migrate_project_command, ) status = check_project().build().execute().output @@ -131,7 +131,7 @@ def migrate(check, skip_template_update, skip_docker_update, strict, preserve_id communicator = ClickCallback() - command = migrate_project().with_communicator(communicator).with_commit() + command = migrate_project_command().with_communicator(communicator).with_commit() result = command.build().execute( skip_template_update=skip_template_update, skip_docker_update=skip_docker_update, diff --git a/renku/ui/cli/service.py b/renku/ui/cli/service.py index aca48eb480..51c2f8a49e 100644 --- a/renku/ui/cli/service.py +++ b/renku/ui/cli/service.py @@ -29,7 +29,7 @@ import psutil import renku.ui.cli.utils.color as color -from renku.command.echo import ERROR +from renku.command.util import ERROR RENKU_DAEMON_LOG_FILE = "renku.log" RENKU_DAEMON_ERR_FILE = "renku.err" diff --git a/renku/ui/cli/storage.py b/renku/ui/cli/storage.py index 1e7e62a30c..90d8994ce8 100644 --- a/renku/ui/cli/storage.py +++ b/renku/ui/cli/storage.py @@ -96,7 +96,7 @@ import click import renku.ui.cli.utils.color as color -from renku.command.echo import WARNING +from renku.command.util import WARNING from renku.ui.cli.utils.callback import ClickCallback diff --git a/renku/ui/cli/workflow.py b/renku/ui/cli/workflow.py index 2dc59a8114..cd93cd0170 100644 --- a/renku/ui/cli/workflow.py +++ b/renku/ui/cli/workflow.py @@ -728,8 +728,8 @@ from lazy_object_proxy import Proxy import renku.ui.cli.utils.color as color -from renku.command.echo import ERROR from renku.command.format.workflow import WORKFLOW_COLUMNS, WORKFLOW_FORMATS, WORKFLOW_VISUALIZE_FORMATS +from renku.command.util import ERROR from renku.command.view_model.activity_graph import ACTIVITY_GRAPH_COLUMNS from renku.core import errors from renku.ui.cli.utils.callback import ClickCallback diff --git a/renku/ui/service/controllers/api/mixins.py b/renku/ui/service/controllers/api/mixins.py index f5dad52288..89d8f95701 100644 --- a/renku/ui/service/controllers/api/mixins.py +++ b/renku/ui/service/controllers/api/mixins.py @@ -26,7 +26,7 @@ from renku.core.constant import RENKU_HOME from renku.core.errors import GitCommandError, GitConfigurationError, LockError, RenkuException, UninitializedProject -from renku.core.util.contexts import click_context +from renku.core.util.contexts import renku_project_context from renku.infrastructure.repository import Repository from renku.ui.service.cache.config import REDIS_NAMESPACE from renku.ui.service.cache.models.job import Job @@ -311,7 +311,7 @@ def local(self): self.project_path = project.abs_path - with click_context(self.project_path, "renku_op"): + with renku_project_context(self.project_path): return self.renku_op() except (portalocker.LockException, portalocker.AlreadyLocked, LockError) as e: raise IntermittentLockError() from e @@ -328,7 +328,7 @@ def remote(self): if not (self.project_path / RENKU_HOME).exists(): raise UninitializedProject(self.project_path) - with click_context(self.project_path, "renku_op"): + with renku_project_context(self.project_path): return self.renku_op() diff --git a/renku/ui/service/controllers/cache_migrate_project.py b/renku/ui/service/controllers/cache_migrate_project.py index 2033c67cfb..a31fcca1c6 100644 --- a/renku/ui/service/controllers/cache_migrate_project.py +++ b/renku/ui/service/controllers/cache_migrate_project.py @@ -16,7 +16,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """Renku service migrate project controller.""" -from renku.core.util.contexts import click_context +from renku.core.util.contexts import renku_project_context from renku.ui.service.cache.models.job import Job from renku.ui.service.config import MESSAGE_PREFIX, PROJECT_CLONE_NO_DEPTH from renku.ui.service.controllers.api.abstract import ServiceCtrl @@ -32,15 +32,15 @@ def execute_migration( project_path, force_template_update, skip_template_update, skip_docker_update, skip_migrations, commit_message ): """Execute project migrations.""" - from renku.command.migrate import migrate_project + from renku.command.migrate import migrate_project_command worker_log.debug(f"migrating {project_path}") communicator = ServiceCallback() - with click_context(project_path, "execute_migration"): + with renku_project_context(project_path): result = ( - migrate_project() + migrate_project_command() .with_commit(message=commit_message) .with_communicator(communicator) .build() diff --git a/renku/ui/service/controllers/cache_migrations_check.py b/renku/ui/service/controllers/cache_migrations_check.py index 19ee263544..f948dd0723 100644 --- a/renku/ui/service/controllers/cache_migrations_check.py +++ b/renku/ui/service/controllers/cache_migrations_check.py @@ -22,8 +22,8 @@ from renku.command.migrate import migrations_check from renku.core.errors import AuthenticationError, MinimumVersionError, ProjectNotFound, RenkuException -from renku.core.management.migrate import SUPPORTED_PROJECT_VERSION -from renku.core.util.contexts import click_context +from renku.core.migration.migrate import SUPPORTED_PROJECT_VERSION +from renku.core.util.contexts import renku_project_context from renku.ui.service.controllers.api.abstract import ServiceCtrl from renku.ui.service.controllers.api.mixins import RenkuOperationMixin from renku.ui.service.interfaces.git_api_provider import IGitAPIProvider @@ -64,7 +64,7 @@ def _fast_op_without_cache(self): ref=self.request_data.get("ref", None), token=self.user_data.get("token", None), ) - with click_context(tempdir, "renku_op"): + with renku_project_context(tempdir): return self.renku_op() def renku_op(self): diff --git a/renku/ui/service/controllers/templates_create_project.py b/renku/ui/service/controllers/templates_create_project.py index 0e5cf3d6b8..6795d076c1 100644 --- a/renku/ui/service/controllers/templates_create_project.py +++ b/renku/ui/service/controllers/templates_create_project.py @@ -23,7 +23,7 @@ from marshmallow import EXCLUDE from renku.command.init import create_from_template_local_command -from renku.core.util.contexts import click_context +from renku.core.util.contexts import renku_project_context from renku.domain_model.template import TEMPLATE_MANIFEST, TemplatesManifest from renku.infrastructure.repository import Repository from renku.ui.service.config import MESSAGE_PREFIX @@ -147,7 +147,7 @@ def new_project(self): source_path = template_project.abs_path / self.ctx["identifier"] - with click_context(new_project_path, "create_from_template"): + with renku_project_context(new_project_path): create_from_template_local_command().build().execute( source_path, name=self.ctx["project_name"], diff --git a/renku/ui/service/controllers/utils/remote_project.py b/renku/ui/service/controllers/utils/remote_project.py index 7847d4b26f..344e66f256 100644 --- a/renku/ui/service/controllers/utils/remote_project.py +++ b/renku/ui/service/controllers/utils/remote_project.py @@ -24,7 +24,7 @@ from marshmallow import EXCLUDE from renku.core import errors -from renku.core.util.contexts import click_context +from renku.core.util.contexts import renku_project_context from renku.infrastructure.repository import Repository from renku.ui.service.serializers.cache import ProjectCloneContext @@ -74,5 +74,5 @@ def remote(self): raise - with click_context(td, "remote_project"): + with renku_project_context(td): yield td diff --git a/renku/ui/service/controllers/version.py b/renku/ui/service/controllers/version.py index 132f4eee3b..facb81c412 100644 --- a/renku/ui/service/controllers/version.py +++ b/renku/ui/service/controllers/version.py @@ -17,7 +17,7 @@ # limitations under the License. """Renku service version controller.""" from renku import __version__ -from renku.core.management.migrate import SUPPORTED_PROJECT_VERSION +from renku.core.migration.migrate import SUPPORTED_PROJECT_VERSION from renku.ui.service.controllers.api.abstract import ServiceCtrl from renku.ui.service.serializers.version import VersionResponseRPC from renku.ui.service.views import result_response diff --git a/renku/ui/service/jobs/datasets.py b/renku/ui/service/jobs/datasets.py index 750f151363..1baeb0b913 100644 --- a/renku/ui/service/jobs/datasets.py +++ b/renku/ui/service/jobs/datasets.py @@ -22,7 +22,7 @@ from renku.command.dataset import add_to_dataset_command, import_dataset_command from renku.core import errors -from renku.core.util.contexts import click_context +from renku.core.util.contexts import renku_project_context from renku.core.util.git import push_changes from renku.domain_model.git import GitURL from renku.infrastructure.repository import Repository @@ -55,7 +55,7 @@ def dataset_import( try: worker_log.debug(f"retrieving metadata for project {project_id}") project = cache.get_project(user, project_id) - with click_context(project.abs_path, "dataset_import"): + with renku_project_context(project.abs_path): worker_log.debug(f"project found in cache - importing dataset {dataset_uri}") communicator = ServiceCallback(user_job=user_job) @@ -107,7 +107,7 @@ def dataset_add_remote_file(cache, user, user_job_id, project_id, create_dataset worker_log.debug(f"checking metadata for project {project_id}") project = cache.get_project(user, project_id) - with click_context(project.abs_path, "dataset_add_remote_file"): + with renku_project_context(project.abs_path): urls = url if isinstance(url, list) else [url] worker_log.debug(f"adding files {urls} to dataset {name}") diff --git a/tests/api/test_activity.py b/tests/api/test_activity.py index 1c9860b3fa..f7f136a09d 100644 --- a/tests/api/test_activity.py +++ b/tests/api/test_activity.py @@ -103,9 +103,9 @@ def test_get_activity_upstreams(project_with_runs): assert "plan-1" == upstreams[0].plan.name -def test_filter_activities(project_with_runs, with_injections_manager): +def test_filter_activities(project_with_runs, with_injection): """Test Activity.filter method.""" - with with_injections_manager(project_with_runs): + with with_injection(project_with_runs): activity_gateway = ActivityGateway() activity = next(a for a in activity_gateway.get_all_activities() if a.association.plan.name == "plan-2") plan = activity.association.plan diff --git a/tests/api/test_dataset.py b/tests/api/test_dataset.py index ac72610d9e..e5d042544f 100644 --- a/tests/api/test_dataset.py +++ b/tests/api/test_dataset.py @@ -31,6 +31,9 @@ def test_list_datasets(project_with_datasets): assert {"dataset-1", "dataset-2"} == {d.name for d in datasets} + dataset = next(d for d in Dataset.list() if d.name == "dataset-2") + assert {"P1", "P2"} == {c.name for c in dataset.creators} + def test_list_datasets_outside_a_context(project_with_datasets): """Test listing datasets outside a project context.""" diff --git a/tests/api/test_parameter.py b/tests/api/test_parameter.py index 7267c63249..036f2c993f 100644 --- a/tests/api/test_parameter.py +++ b/tests/api/test_parameter.py @@ -26,7 +26,6 @@ get_indirect_outputs_path, read_indirect_parameters, ) -from renku.domain_model.project_context import project_context from renku.ui.api import Input, Output, Parameter, Project @@ -87,8 +86,8 @@ def test_indirect_inputs_outputs(project): assert Path(path_1) == input_1.path assert Path(path_2) == output_2.path - input_content = get_indirect_inputs_path(project_context.path).read_text() - output_content = get_indirect_outputs_path(project_context.path).read_text() + input_content = get_indirect_inputs_path(project.path).read_text() + output_content = get_indirect_outputs_path(project.path).read_text() assert path_1 == list(yaml.safe_load(input_content).values())[0] assert input_1.name == list(yaml.safe_load(input_content).keys())[0] @@ -101,7 +100,7 @@ def test_open_inputs(project): with open(Input("input-1", "input.txt"), "w") as f: f.write("some data") - assert "some data" == (project_context.path / "input.txt").read_text() + assert "some data" == (project.path / "input.txt").read_text() def test_open_outputs(project): @@ -109,7 +108,7 @@ def test_open_outputs(project): with open(Output("output-1", "output.txt"), "w") as f: f.write("some data") - assert "some data" == (project_context.path / "output.txt").read_text() + assert "some data" == (project.path / "output.txt").read_text() def test_parameters(project): @@ -123,7 +122,7 @@ def test_parameters(project): assert (42, "42", 42.42) == (p1.value, p2.value, p3.value) - data = read_indirect_parameters(project_context.path) + data = read_indirect_parameters(project.path) assert {"parameter 1", "param-2", "parameter_3 "} == set(data.keys()) assert {42, "42", 42.42} == set(data.values()) diff --git a/tests/api/test_project.py b/tests/api/test_project.py index 2f4912289f..bac1f6630b 100644 --- a/tests/api/test_project.py +++ b/tests/api/test_project.py @@ -21,7 +21,6 @@ import pytest -from renku.domain_model.project_context import project_context from renku.ui.api import Project from renku.ui.cli import cli from tests.utils import format_result_exception, write_and_commit_file @@ -30,12 +29,12 @@ @pytest.mark.parametrize("sub_path", [".", "src", "src/notebooks"]) def test_get_project(project, sub_path): """Test getting Project context within a repository.""" - working_dir = project_context.path / sub_path + working_dir = project.path / sub_path working_dir.mkdir(exist_ok=True, parents=True) os.chdir(working_dir) - with Project() as project: - assert project_context.path == project.path + with Project() as project_object: + assert project.path == project_object.path def test_get_project_multiple(project): @@ -69,12 +68,10 @@ def test_get_project_outside_a_renku_project(directory_tree): def test_status(runner, project): """Test status check.""" - source = project_context.path / "source.txt" - output = project_context.path / "data" / "output.txt" - - repository = project_context.repository + source = project.path / "source.txt" + output = project.path / "data" / "output.txt" - write_and_commit_file(repository, source, "content") + write_and_commit_file(project.repository, source, "content") result = runner.invoke(cli, ["run", "cp", source, output]) assert 0 == result.exit_code, format_result_exception(result) @@ -82,7 +79,7 @@ def test_status(runner, project): result = runner.invoke(cli, ["run", "cat", "--no-output", source]) assert 0 == result.exit_code, format_result_exception(result) - write_and_commit_file(repository, source, "new content") + write_and_commit_file(project.repository, source, "new content") result = Project().status() diff --git a/tests/cli/fixtures/cli_old_projects.py b/tests/cli/fixtures/cli_old_projects.py index 97829b4b15..6353f1ef79 100644 --- a/tests/cli/fixtures/cli_old_projects.py +++ b/tests/cli/fixtures/cli_old_projects.py @@ -18,18 +18,19 @@ """Renku CLI fixtures for old project management.""" from pathlib import Path -from typing import Generator +from typing import Generator, List, Tuple import pytest -from renku.core.git import with_project_metadata +from renku.core.util.contexts import with_project_metadata from renku.domain_model.project_context import project_context from renku.infrastructure.repository import Repository +from tests.fixtures.repository import RenkuProject from tests.utils import clone_compressed_repository @pytest.fixture(params=["old-datasets-v0.3.0.git", "old-datasets-v0.5.1.git", "test-renku-v0.3.0.git"]) -def old_project(request, tmp_path) -> Generator[Repository, None, None]: +def old_project(request, tmp_path) -> Generator[RenkuProject, None, None]: """Prepares a testing repo created by old version of renku.""" from renku.core.util.contexts import chdir @@ -38,14 +39,14 @@ def old_project(request, tmp_path) -> Generator[Repository, None, None]: repository = clone_compressed_repository(base_path=base_path, name=name) with chdir(repository.path): - yield repository + yield RenkuProject(path=repository.path, repository=repository) @pytest.fixture( params=[ { "name": "old-workflows-v0.10.3.git", - "log_path": "catoutput.txt", + "log_path": "cat-output.txt", "expected_strings": [ "catoutput.txt", "stdin.txt", @@ -63,26 +64,20 @@ def old_project(request, tmp_path) -> Generator[Repository, None, None]: }, ], ) -def old_workflow_project(request, tmp_path): +def old_workflow_project(request, tmp_path) -> Generator[Tuple[RenkuProject, List[str]], None, None]: """Prepares a testing repo created by old version of renku.""" from renku.core.util.contexts import chdir name = request.param["name"] base_path = tmp_path / name repository = clone_compressed_repository(base_path=base_path, name=name) - repository_path = repository.path - with chdir(repository_path): - yield { - "repo": repository, - "path": repository_path, - "log_path": request.param["log_path"], - "expected_strings": request.param["expected_strings"], - } + with chdir(repository.path): + yield RenkuProject(path=repository.path, repository=repository), request.param["expected_strings"] @pytest.fixture(params=["old-datasets-v0.9.1.git"]) -def old_dataset_project(request, tmp_path): +def old_dataset_project(request, tmp_path) -> Generator[RenkuProject, None, None]: """Prepares a testing repo created by old version of renku.""" from renku.core.util.contexts import chdir @@ -91,11 +86,11 @@ def old_dataset_project(request, tmp_path): repository = clone_compressed_repository(base_path=base_path, name=name) with chdir(repository.path), project_context.with_path(repository.path): - yield repository + yield RenkuProject(path=repository.path, repository=repository) @pytest.fixture -def old_repository_with_submodules(tmp_path): +def old_repository_with_submodules(tmp_path) -> Generator[RenkuProject, None, None]: """Prepares a testing repo that has datasets using git submodules.""" import tarfile @@ -109,21 +104,21 @@ def old_repository_with_submodules(tmp_path): repo.extractall(working_dir) repo_path = working_dir / name - repo = Repository(repo_path) + repository = Repository(repo_path) with chdir(repo_path): - yield repo + yield RenkuProject(path=repository.path, repository=repository) @pytest.fixture -def unsupported_project(project, client_database_injection_manager): - """A client with a newer project version.""" - with client_database_injection_manager(project): +def unsupported_project(project, with_injection) -> Generator[RenkuProject, None, None]: + """A newer project version.""" + with with_injection(): with with_project_metadata() as project_metadata: impossible_newer_version = 42000 project_metadata.version = impossible_newer_version - project.add(".renku") - project.commit("update renku.ini", no_verify=True) + project.repository.add(".renku") + project.repository.commit("update renku.ini", no_verify=True) yield project diff --git a/tests/cli/fixtures/cli_projects.py b/tests/cli/fixtures/cli_projects.py index 6e4f49289d..b19d3dad6b 100644 --- a/tests/cli/fixtures/cli_projects.py +++ b/tests/cli/fixtures/cli_projects.py @@ -25,6 +25,7 @@ from renku.core.config import set_value from renku.infrastructure.repository import Repository +from tests.fixtures.repository import RenkuProject @pytest.fixture() @@ -40,55 +41,55 @@ def sleep_after(): @pytest.fixture -def project_with_remote(repository, tmpdir) -> Generator["Repository", None, None]: - """Return a client with a (local) remote set.""" +def project_with_remote(project, tmpdir) -> Generator[RenkuProject, None, None]: + """Return a project with a (local) remote set.""" # NOTE: Create a remote repository path = tmpdir.mkdir("remote") Repository.initialize(path, bare=True) - repository.remotes.add(name="origin", url=path) - repository.push("origin", "master", set_upstream=True) + project.repository.remotes.add(name="origin", url=path) + project.repository.push("origin", "master", set_upstream=True) try: - yield repository + yield project finally: - repository.checkout("master") - repository.run_git_command("branch", "--unset-upstream") - repository.remotes.remove("origin") + project.repository.checkout("master") + project.repository.run_git_command("branch", "--unset-upstream") + project.repository.remotes.remove("origin") shutil.rmtree(path) @pytest.fixture -def no_lfs_warning(repository): +def no_lfs_warning(project): """Sets show_lfs_message to False. For those times in life when mocking just isn't enough. """ set_value("renku", "show_lfs_message", "False") - repository.add(all=True) - repository.commit(message="Unset show_lfs_message") + project.repository.add(all=True) + project.repository.commit(message="Unset show_lfs_message") yield @pytest.fixture -def client_with_lfs_warning(repository): +def project_with_lfs_warning(project): """Return a Renku repository with lfs warnings active.""" from renku.domain_model.project_context import project_context - with project_context.with_path(repository.path): + with project_context.with_path(project.path): set_value("renku", "lfs_threshold", "0b") set_value("renku", "show_lfs_message", "True") - repository.add(".renku/renku.ini") - repository.commit("update renku.ini") + project.repository.add(".renku/renku.ini") + project.repository.commit("update renku.ini") yield @pytest.fixture(params=[".", "some/sub/directory"]) -def subdirectory(project, request): +def subdirectory(request) -> Generator[Path, None, None]: """Runs tests in root directory and a subdirectory.""" from renku.core.util.contexts import chdir diff --git a/tests/cli/fixtures/cli_providers.py b/tests/cli/fixtures/cli_providers.py index 482e91a591..ca76372b15 100644 --- a/tests/cli/fixtures/cli_providers.py +++ b/tests/cli/fixtures/cli_providers.py @@ -30,24 +30,24 @@ @pytest.fixture -def zenodo_sandbox(repository): +def zenodo_sandbox(project): """Configure environment to use Zenodo sandbox environment.""" os.environ["ZENODO_USE_SANDBOX"] = "true" access_token = os.getenv("ZENODO_ACCESS_TOKEN", "") set_value("zenodo", "access_token", access_token) - repository.add(".renku/renku.ini") - repository.commit("update renku.ini") + project.repository.add(".renku/renku.ini") + project.repository.commit("update renku.ini") @pytest.fixture -def olos_sandbox(repository): +def olos_sandbox(project): """Configure environment to use Zenodo sandbox environment.""" access_token = os.getenv("OLOS_ACCESS_TOKEN", "") set_value("olos", "access_token", access_token) - repository.add(".renku/renku.ini") - repository.commit("update renku.ini") + project.repository.add(".renku/renku.ini") + project.repository.commit("update renku.ini") @pytest.fixture(scope="module") @@ -86,14 +86,14 @@ def remove_datasets(): @pytest.fixture -def dataverse_demo(repository, dataverse_demo_cleanup): +def dataverse_demo(project, dataverse_demo_cleanup): """Configure environment to use Dataverse demo environment.""" access_token = os.getenv("DATAVERSE_ACCESS_TOKEN", "") set_value("dataverse", "access_token", access_token) set_value("dataverse", "server_url", "https://demo.dataverse.org") - repository.add(".renku/renku.ini") - repository.commit("renku.ini") + project.repository.add(".renku/renku.ini") + project.repository.commit("renku.ini") @pytest.fixture diff --git a/tests/cli/fixtures/cli_runner.py b/tests/cli/fixtures/cli_runner.py index 2179087d65..316804e3a5 100644 --- a/tests/cli/fixtures/cli_runner.py +++ b/tests/cli/fixtures/cli_runner.py @@ -28,7 +28,7 @@ @pytest.fixture -def renku_cli(client, run, client_database_injection_manager): +def renku_cli(project, run, with_injection): """Return a callable Renku CLI. It returns the exit code and the resulting activity or list of activities. @@ -42,14 +42,14 @@ def renku_cli_(*args, **kwargs) -> Tuple[int, Union[None, Activity, List[Activit def _get_activities(activity_gateway: IActivityGateway): return {a.id: a for a in activity_gateway.get_all_activities()} - with client_database_injection_manager(client): + with with_injection(project): activities_before = _get_activities() str_args = [str(a) for a in args] exit_code = run(str_args, **kwargs) - with client_database_injection_manager(client): + with with_injection(project): activities_after = _get_activities() new_activities = [a for id, a in activities_after.items() if id not in activities_before] diff --git a/tests/cli/test_config.py b/tests/cli/test_config.py index d80f10655b..300a4adb64 100644 --- a/tests/cli/test_config.py +++ b/tests/cli/test_config.py @@ -25,12 +25,11 @@ import pytest import renku.core.config -from renku.domain_model.project_context import project_context from renku.ui.cli import cli from tests.utils import format_result_exception, retry_failed -def test_config_value_locally(client, runner, project): +def test_config_value_locally(runner, project): """Check setting/getting from local configuration.""" result = runner.invoke(cli, ["config", "set", "key", "local-value"]) assert 0 == result.exit_code, format_result_exception(result) @@ -47,7 +46,7 @@ def test_config_value_locally(client, runner, project): assert 2 == result.exit_code -def test_config_value_globally(client, runner, project): +def test_config_value_globally(runner, project): """Check setting/getting from global configuration.""" result = runner.invoke(cli, ["config", "set", "key", "global-value", "--global"]) assert 0 == result.exit_code, format_result_exception(result) @@ -63,7 +62,7 @@ def test_config_value_globally(client, runner, project): assert 2 == result.exit_code -def test_config_default(client, runner, project): +def test_config_default(runner, project): """Check setting/getting from local configuration.""" result = runner.invoke(cli, ["config", "set", "lfs_threshold", "0b"]) assert 0 == result.exit_code, format_result_exception(result) @@ -84,13 +83,13 @@ def test_config_default(client, runner, project): assert result.output == "100kb\n" -def test_config_get_non_existing_value(client, runner, project): +def test_config_get_non_existing_value(runner, project): """Check getting non-existing value is an error.""" result = runner.invoke(cli, ["config", "show", "non-existing"]) assert 2 == result.exit_code -def test_local_overrides_global_config(client, runner, project): +def test_local_overrides_global_config(runner, project): """Test setting config both global and locally.""" result = runner.invoke(cli, ["config", "set", "key", "global-value", "--global"]) assert 0 == result.exit_code, format_result_exception(result) @@ -108,7 +107,7 @@ def test_local_overrides_global_config(client, runner, project): @pytest.mark.parametrize("global_only", (False, True)) -def test_config_remove_value_locally(client, runner, project, global_only): +def test_config_remove_value_locally(runner, project, global_only): """Check removing value from local configuration.""" param = ["--global"] if global_only else [] result = runner.invoke(cli, ["config", "set", "key", "some-value"] + param) @@ -124,27 +123,27 @@ def test_config_remove_value_locally(client, runner, project, global_only): assert "some-value" not in result.output -def test_local_config_committed(client, runner, data_repository): +def test_local_config_committed(project, runner, data_repository): """Test local configuration update is committed only when it is changed.""" - commit_sha_before = client.head.commit.hexsha + commit_sha_before = project.repository.head.commit.hexsha result = runner.invoke(cli, ["config", "set", "local-key", "value"]) assert 0 == result.exit_code, format_result_exception(result) - commit_sha_after = client.head.commit.hexsha + commit_sha_after = project.repository.head.commit.hexsha assert commit_sha_after != commit_sha_before # Adding the same config should not create a new commit - commit_sha_before = project_context.repository.head.commit.hexsha + commit_sha_before = project.repository.head.commit.hexsha result = runner.invoke(cli, ["config", "set", "local-key", "value"]) assert 0 == result.exit_code, format_result_exception(result) - commit_sha_after = project_context.repository.head.commit.hexsha + commit_sha_after = project.repository.head.commit.hexsha assert commit_sha_after == commit_sha_before # Adding a global config should not create a new commit result = runner.invoke(cli, ["config", "set", "global-key", "value", "--global"]) assert 0 == result.exit_code, format_result_exception(result) - commit_sha_after = project_context.repository.head.commit.hexsha + commit_sha_after = project.repository.head.commit.hexsha assert commit_sha_after == commit_sha_before @@ -157,7 +156,7 @@ def test_local_config_committed(client, runner, data_repository): ), ], ) -def test_invalid_command_args(client, runner, project, args, message): +def test_invalid_command_args(runner, project, args, message): """Test invalid combination of command-line arguments.""" result = runner.invoke(cli, ["config"] + args) assert 2 == result.exit_code @@ -165,7 +164,7 @@ def test_invalid_command_args(client, runner, project, args, message): @pytest.mark.parametrize("config_key", ["data_directory"]) -def test_readonly_config(client, runner, project, config_key): +def test_readonly_config(runner, project, config_key): """Test readonly config can only be set once.""" result = runner.invoke(cli, ["config", "set", config_key, "value"]) assert 0 == result.exit_code, format_result_exception(result) @@ -179,7 +178,7 @@ def test_readonly_config(client, runner, project, config_key): assert f"Configuration {config_key} cannot be modified." in result.output -def test_config_read_concurrency(runner, project, client, run): +def test_config_read_concurrency(runner, project, run): """Test config can be read concurrently.""" result = runner.invoke(cli, ["config", "set", "test", "value"]) assert 0 == result.exit_code, format_result_exception(result) @@ -206,7 +205,7 @@ def test_config_read_concurrency(runner, project, client, run): @retry_failed -def test_config_write_concurrency(monkeypatch, runner, project, client, run): +def test_config_write_concurrency(monkeypatch, runner, project, run): """Test config cannot be written concurrently. Only one execution succeeds in that case.""" REPETITIONS = 4 CONFIG_KEY = "write_key" @@ -262,7 +261,7 @@ def single_true(iterable): @pytest.mark.parametrize("value", ["%value", "${value}"]) -def test_config_interpolation_is_disabled(client, runner, value): +def test_config_interpolation_is_disabled(project, runner, value): """Test ConfigParser interpolation is disabled.""" result = runner.invoke(cli, ["config", "set", "key", value]) @@ -274,22 +273,20 @@ def test_config_interpolation_is_disabled(client, runner, value): assert f"{value}\n" == result.output -def test_config_commit(client, runner, data_repository): +def test_config_commit(project, runner, data_repository): """Test config changes only commits the renku config file.""" - commit_sha_before = project_context.repository.head.commit.hexsha + commit_sha_before = project.repository.head.commit.hexsha - (project_context.path / "untracked").write_text("untracked") - (project_context.path / "staged").write_text("staged") - project_context.repository.add("staged") + (project.path / "untracked").write_text("untracked") + (project.path / "staged").write_text("staged") + project.repository.add("staged") result = runner.invoke(cli, ["config", "set", "key", "value"]) assert 0 == result.exit_code, format_result_exception(result) - assert {os.path.join(".renku", "renku.ini")} == { - f.a_path for f in project_context.repository.head.commit.get_changes() - } - assert {"untracked"} == set(project_context.repository.untracked_files) - assert {"staged"} == {f.a_path for f in project_context.repository.staged_changes} + assert {os.path.join(".renku", "renku.ini")} == {f.a_path for f in project.repository.head.commit.get_changes()} + assert {"untracked"} == set(project.repository.untracked_files) + assert {"staged"} == {f.a_path for f in project.repository.staged_changes} - commit_sha_after = project_context.repository.head.commit.hexsha + commit_sha_after = project.repository.head.commit.hexsha assert commit_sha_after != commit_sha_before diff --git a/tests/cli/test_datasets.py b/tests/cli/test_datasets.py index 2bd59d0dc4..b7a619d5d3 100644 --- a/tests/cli/test_datasets.py +++ b/tests/cli/test_datasets.py @@ -39,25 +39,30 @@ from renku.core.util.git import get_dirty_paths from renku.core.util.urls import get_slug from renku.domain_model.dataset import Dataset -from renku.domain_model.project_context import project_context from renku.ui.cli import cli -from tests.utils import assert_dataset_is_mutated, format_result_exception, write_and_commit_file +from tests.utils import ( + assert_dataset_is_mutated, + format_result_exception, + get_dataset_with_injection, + get_datasets_provenance_with_injection, + write_and_commit_file, +) -def test_datasets_create_clean(runner, project, client, load_dataset_with_injection): +def test_datasets_create_clean(runner, project): """Test creating a dataset in clean repository.""" result = runner.invoke(cli, ["dataset", "create", "dataset"]) assert 0 == result.exit_code, format_result_exception(result) assert "OK" in result.output - dataset = load_dataset_with_injection("dataset", client) + dataset = get_dataset_with_injection("dataset") assert isinstance(dataset, Dataset) assert Path("data/dataset/") == dataset.get_datadir() - assert not project_context.repository.is_dirty(untracked_files=True) + assert not project.repository.is_dirty(untracked_files=True) -def test_datasets_create_clean_with_datadir(runner, project, client, load_dataset_with_injection): +def test_datasets_create_clean_with_datadir(runner, project): """Test creating a dataset in clean repository.""" datadir = Path("my/data/dir") @@ -66,14 +71,14 @@ def test_datasets_create_clean_with_datadir(runner, project, client, load_datase assert 0 == result.exit_code, format_result_exception(result) assert "OK" in result.output - dataset = load_dataset_with_injection("dataset", client) + dataset = get_dataset_with_injection("dataset") assert isinstance(dataset, Dataset) assert datadir == dataset.get_datadir() - assert not project_context.repository.is_dirty(untracked_files=True) + assert not project.repository.is_dirty(untracked_files=True) -def test_datasets_create_with_datadir_with_files(runner, project, client, load_dataset_with_injection): +def test_datasets_create_with_datadir_with_files(runner, project): """Test creating a dataset in clean repository.""" datadir = Path("my/data/dir") @@ -86,35 +91,37 @@ def test_datasets_create_with_datadir_with_files(runner, project, client, load_d assert 0 == result.exit_code, format_result_exception(result) assert "OK" in result.output - dataset = load_dataset_with_injection("dataset", client) + dataset = get_dataset_with_injection("dataset") assert isinstance(dataset, Dataset) assert datadir == dataset.get_datadir() assert dataset.find_file(file) - assert not project_context.repository.is_dirty(untracked_files=True) + assert not project.repository.is_dirty(untracked_files=True) -def test_datasets_create_dirty(runner, project, client, load_dataset_with_injection): +def test_datasets_create_dirty(runner, project): """Test creating a dataset in a dirty repository.""" - (project_context.path / "untracked").write_text("untracked") - (project_context.path / "staged").write_text("staged") - project_context.repository.add("staged") + (project.path / "untracked").write_text("untracked") + (project.path / "staged").write_text("staged") + project.repository.add("staged") result = runner.invoke(cli, ["dataset", "create", "dataset"]) assert 0 == result.exit_code, format_result_exception(result) - dataset = load_dataset_with_injection("dataset", client) + dataset = get_dataset_with_injection("dataset") assert dataset # All staged files will be committed - assert 0 == len(project_context.repository.staged_changes) + assert 0 == len(project.repository.staged_changes) # Untracked files won't be committed - assert {"untracked"} == set(project_context.repository.untracked_files) + assert {"untracked"} == set(project.repository.untracked_files) -@pytest.mark.parametrize("datadir_option,datadir", [([], f"{DATA_DIR}/my-dataset"), (["--datadir", "mydir"], "mydir")]) -def test_dataset_show(runner, client, subdirectory, datadir_option, datadir): +@pytest.mark.parametrize( + "datadir_option,datadir", [([], f"{DATA_DIR}/my-dataset"), (["--datadir", "my-dir"], "my-dir")] +) +def test_dataset_show(runner, project, subdirectory, datadir_option, datadir): """Test creating and showing a dataset with metadata.""" result = runner.invoke(cli, ["dataset", "show", "my-dataset"]) assert 1 == result.exit_code, format_result_exception(result) @@ -125,7 +132,7 @@ def test_dataset_show(runner, client, subdirectory, datadir_option, datadir): "@type": "https://schema.org/specialType", "https://schema.org/specialProperty": "some_unique_value", } - metadata_path = project_context.path / "metadata.json" + metadata_path = project.path / "metadata.json" metadata_path.write_text(json.dumps(metadata)) result = runner.invoke( @@ -172,7 +179,7 @@ def test_dataset_show(runner, client, subdirectory, datadir_option, datadir): assert "Data Directory:" -def test_dataset_show_tag(runner, client, subdirectory): +def test_dataset_show_tag(runner, project, subdirectory): """Test creating and showing a dataset with metadata.""" result = runner.invoke(cli, ["dataset", "show", "my-dataset"]) assert 1 == result.exit_code, format_result_exception(result) @@ -183,7 +190,7 @@ def test_dataset_show_tag(runner, client, subdirectory): "@type": "https://schema.org/specialType", "https://schema.org/specialProperty": "some_unique_value", } - metadata_path = project_context.path / "metadata.json" + metadata_path = project.path / "metadata.json" metadata_path.write_text(json.dumps(metadata)) result = runner.invoke( @@ -243,7 +250,7 @@ def test_dataset_show_tag(runner, client, subdirectory): assert "description3" not in result.output -def test_datasets_create_different_names(runner, client): +def test_datasets_create_different_names(runner, project): """Test creating datasets with same title but different name.""" result = runner.invoke(cli, ["dataset", "create", "dataset-1", "--title", "title"]) assert 0 == result.exit_code, format_result_exception(result) @@ -254,7 +261,7 @@ def test_datasets_create_different_names(runner, client): assert "OK" in result.output -def test_datasets_create_with_same_name(runner, client): +def test_datasets_create_with_same_name(runner, project): """Test creating datasets with same name.""" result = runner.invoke(cli, ["dataset", "create", "dataset"]) assert 0 == result.exit_code, format_result_exception(result) @@ -277,7 +284,7 @@ def test_datasets_create_with_same_name(runner, client): "name ends in.lock", ], ) -def test_datasets_invalid_name(runner, client, name): +def test_datasets_invalid_name(runner, project, name): """Test creating datasets with invalid name.""" result = runner.invoke(cli, ["dataset", "create", name]) @@ -286,10 +293,10 @@ def test_datasets_invalid_name(runner, client, name): assert f"Hint: '{get_slug(name)}' is valid" in result.output -def test_datasets_create_dirty_exception_untracked(runner, project, client): +def test_datasets_create_dirty_exception_untracked(runner, project): """Test exception raise for untracked file in renku directory.""" # 1. Create a problem. - datasets_dir = project_context.database_path + datasets_dir = project.database_path if not datasets_dir.exists(): datasets_dir.mkdir() @@ -302,10 +309,10 @@ def test_datasets_create_dirty_exception_untracked(runner, project, client): assert ".renku contains uncommitted changes." in result.output -def test_datasets_create_dirty_exception_staged(runner, project, client): +def test_datasets_create_dirty_exception_staged(runner, project): """Test exception raise for staged file in renku directory.""" # 1. Create a problem within .renku directory - datasets_dir = project_context.database_path + datasets_dir = project.database_path if not datasets_dir.exists(): datasets_dir.mkdir() @@ -313,7 +320,7 @@ def test_datasets_create_dirty_exception_staged(runner, project, client): fp.write("a") # 2. Stage a problem without committing it. - project_context.repository.add(datasets_dir / "a") + project.repository.add(datasets_dir / "a") # 3. Ensure correct error has been raised. result = runner.invoke(cli, ["dataset", "create", "dataset"]) @@ -321,14 +328,14 @@ def test_datasets_create_dirty_exception_staged(runner, project, client): assert ".renku contains uncommitted changes." in result.output -def test_dataset_create_dirty_exception_all_untracked(runner, project, client): +def test_dataset_create_dirty_exception_all_untracked(runner, project): """Test exception raise for all untracked files.""" # 1. Create unclean root to enforce ensure checks. - with (project_context.path / "a").open("w") as fp: + with (project.path / "a").open("w") as fp: fp.write("a") # 2. Create a problem. - datasets_dir = project_context.database_path + datasets_dir = project.database_path if not datasets_dir.exists(): datasets_dir.mkdir() @@ -341,23 +348,23 @@ def test_dataset_create_dirty_exception_all_untracked(runner, project, client): assert ".renku contains uncommitted changes." in result.output -def test_datasets_create_dirty_exception_all_staged(runner, project, client): +def test_datasets_create_dirty_exception_all_staged(runner, project): """Test exception raise for all staged files.""" # 1. Create unclean root to enforce ensure checks. - with (project_context.path / "a").open("w") as fp: + with (project.path / "a").open("w") as fp: fp.write("a") - project_context.repository.add("a") + project.repository.add("a") # 2. Create a problem. - datasets_dir = project_context.database_path + datasets_dir = project.database_path if not datasets_dir.exists(): datasets_dir.mkdir() with (datasets_dir / "a").open("w") as fp: fp.write("a") - project_context.repository.add(datasets_dir / "a") + project.repository.add(datasets_dir / "a") # 3. Ensure correct error has been raised. result = runner.invoke(cli, ["dataset", "create", "dataset"]) @@ -365,19 +372,19 @@ def test_datasets_create_dirty_exception_all_staged(runner, project, client): assert ".renku contains uncommitted changes." in result.output -def test_dataset_create_exception_refs(runner, project, client): +def test_dataset_create_exception_refs(runner, project): """Test untracked/unstaged exception raise in dirty renku home dir.""" - with (project_context.path / "a").open("w") as fp: + with (project.path / "a").open("w") as fp: fp.write("a") - datasets_dir = project_context.database_path + datasets_dir = project.database_path if not datasets_dir.exists(): datasets_dir.mkdir() with (datasets_dir / "a").open("w") as fp: fp.write("a") - refs_dir = project_context.path / RENKU_HOME / REFS + refs_dir = project.path / RENKU_HOME / REFS if not refs_dir.exists(): refs_dir.mkdir() @@ -398,7 +405,7 @@ def test_dataset_create_exception_refs(runner, project, client): ("John Doe", "Email"), ], ) -def test_dataset_creator_is_invalid(client, runner, creator, field): +def test_dataset_creator_is_invalid(runner, project, creator, field): """Test create dataset with invalid creator format.""" result = runner.invoke(cli, ["dataset", "create", "ds", "-c", creator]) assert 2 == result.exit_code @@ -414,7 +421,9 @@ def test_datasets_list_empty(output_format, runner, project): @pytest.mark.parametrize("output_format", DATASETS_FORMATS.keys()) -@pytest.mark.parametrize("datadir_option,datadir", [([], f"{DATA_DIR}/my-dataset"), (["--datadir", "mydir"], "mydir")]) +@pytest.mark.parametrize( + "datadir_option,datadir", [([], f"{DATA_DIR}/my-dataset"), (["--datadir", "my-dir"], "my-dir")] +) def test_datasets_list_non_empty(output_format, runner, project, datadir_option, datadir): """Test listing with datasets.""" format_option = "--format={0}".format(output_format) @@ -488,10 +497,10 @@ def test_datasets_list_description(runner, project): assert description[: len(short_description) + 1] not in line -@pytest.mark.parametrize("datadir_option,datadir", [([], f"{DATA_DIR}/new-dataset"), (["--datadir", "mydir"], "mydir")]) -def test_add_and_create_dataset( - directory_tree, runner, project, client, subdirectory, load_dataset_with_injection, datadir_option, datadir -): +@pytest.mark.parametrize( + "datadir_option,datadir", [([], f"{DATA_DIR}/new-dataset"), (["--datadir", "my-dir"], "my-dir")] +) +def test_add_and_create_dataset(directory_tree, runner, project, subdirectory, datadir_option, datadir): """Test add data to a non-existing dataset.""" result = runner.invoke( cli, ["dataset", "add", "--copy", "new-dataset", str(directory_tree)], catch_exceptions=False @@ -499,11 +508,11 @@ def test_add_and_create_dataset( assert 1 == result.exit_code assert 'Dataset "new-dataset" does not exist.' in result.output - existing_file = project_context.path / datadir / "myfolder" / "myfile" + existing_file = project.path / datadir / "my-folder" / "my-file" existing_file.parent.mkdir(parents=True, exist_ok=True) existing_file.write_text("content") - existing_folder = project_context.path / datadir / "my_other_folder" + existing_folder = project.path / datadir / "my_other_folder" existing_folder.mkdir(parents=True, exist_ok=True) # Add succeeds with --create @@ -514,15 +523,15 @@ def test_add_and_create_dataset( ) assert 0 == result.exit_code, format_result_exception(result) - path1 = os.path.join(project_context.path, datadir, directory_tree.name, "file1") - path2 = os.path.join(project_context.path, datadir, directory_tree.name, "dir1", "file2") - path3 = os.path.join(project_context.path, datadir, directory_tree.name, "dir1", "file3") + path1 = os.path.join(project.path, datadir, directory_tree.name, "file1") + path2 = os.path.join(project.path, datadir, directory_tree.name, "dir1", "file2") + path3 = os.path.join(project.path, datadir, directory_tree.name, "dir1", "file3") assert os.stat(path1) assert os.stat(path2) assert os.stat(path3) - dataset = load_dataset_with_injection("new-dataset", client) - assert {os.path.relpath(p, project_context.path) for p in [path1, path2, path3, existing_file]} == { + dataset = get_dataset_with_injection("new-dataset") + assert {os.path.relpath(p, project.path) for p in [path1, path2, path3, existing_file]} == { f.entity.path for f in dataset.files } @@ -531,7 +540,7 @@ def test_add_and_create_dataset( assert 1 == result.exit_code -def test_add_and_create_dataset_with_lfs_warning(directory_tree, runner, project, client_with_lfs_warning): +def test_add_and_create_dataset_with_lfs_warning(directory_tree, runner, project_with_lfs_warning): """Test add data with lfs warning.""" # Add succeeds with --create @@ -544,16 +553,16 @@ def test_add_and_create_dataset_with_lfs_warning(directory_tree, runner, project assert "file" in result.output -def test_add_to_dirty_repo(directory_tree, runner, project, client): +def test_add_to_dirty_repo(directory_tree, runner, project): """Test adding to a dataset in a dirty repo commits only added files.""" - with (project_context.path / "tracked").open("w") as fp: + with (project.path / "tracked").open("w") as fp: fp.write("tracked file") - project_context.repository.add(all=True) - project_context.repository.commit("tracked file") + project.repository.add(all=True) + project.repository.commit("tracked file") - with (project_context.path / "tracked").open("w") as fp: + with (project.path / "tracked").open("w") as fp: fp.write("modified tracked file") - with (project_context.path / "untracked").open("w") as fp: + with (project.path / "untracked").open("w") as fp: fp.write("untracked file") result = runner.invoke( @@ -561,8 +570,8 @@ def test_add_to_dirty_repo(directory_tree, runner, project, client): ) assert 0 == result.exit_code, format_result_exception(result) - assert project_context.repository.is_dirty(untracked_files=True) - assert ["untracked"] == project_context.repository.untracked_files + assert project.repository.is_dirty(untracked_files=True) + assert ["untracked"] == project.repository.untracked_files # Add without making a change result = runner.invoke( @@ -570,18 +579,18 @@ def test_add_to_dirty_repo(directory_tree, runner, project, client): ) assert 1 == result.exit_code - assert project_context.repository.is_dirty(untracked_files=True) - assert ["untracked"] == project_context.repository.untracked_files + assert project.repository.is_dirty(untracked_files=True) + assert ["untracked"] == project.repository.untracked_files -def test_add_unicode_file(tmpdir, runner, project, client): +def test_add_unicode_file(tmpdir, runner, project): """Test adding files with unicode special characters in their names.""" # create a dataset result = runner.invoke(cli, ["dataset", "create", "my-dataset"]) assert 0 == result.exit_code, format_result_exception(result) assert "OK" in result.output - filename = "filéàèû爱ಠ_ಠ.txt" + filename = "fi1é-àèû爱ಠ_ಠ.txt" new_file = tmpdir.join(filename) new_file.write(str("test")) @@ -594,14 +603,14 @@ def test_add_unicode_file(tmpdir, runner, project, client): assert filename in result.output.encode("latin1").decode("unicode-escape") -def test_multiple_file_to_dataset(tmpdir, runner, project, client, load_dataset_with_injection): +def test_multiple_file_to_dataset(tmpdir, runner, project): """Test importing multiple data into a dataset at once.""" # create a dataset result = runner.invoke(cli, ["dataset", "create", "dataset"]) assert 0 == result.exit_code, format_result_exception(result) assert "OK" in result.output - dataset = load_dataset_with_injection("dataset", client) + dataset = get_dataset_with_injection("dataset") assert dataset.title == "dataset" paths = [] @@ -618,15 +627,15 @@ def test_multiple_file_to_dataset(tmpdir, runner, project, client, load_dataset_ assert 0 == result.exit_code, format_result_exception(result) -@pytest.mark.parametrize("datadir_option,datadir", [([], f"{DATA_DIR}/local"), (["--datadir", "mydir"], "mydir")]) -def test_add_with_relative_path(runner, client, directory_tree, subdirectory, datadir_option, datadir): +@pytest.mark.parametrize("datadir_option,datadir", [([], f"{DATA_DIR}/local"), (["--datadir", "my-dir"], "my-dir")]) +def test_add_with_relative_path(runner, project, directory_tree, subdirectory, datadir_option, datadir): """Test adding data with relative path.""" relative_path = os.path.relpath(directory_tree / "file1", os.getcwd()) result = runner.invoke(cli, ["dataset", "add", "--copy", "--create", "local", relative_path] + datadir_option) assert 0 == result.exit_code, format_result_exception(result) - path = project_context.path / datadir / "file1" + path = project.path / datadir / "file1" assert path.exists() assert "file1 content" == path.read_text() @@ -634,17 +643,17 @@ def test_add_with_relative_path(runner, client, directory_tree, subdirectory, da @pytest.mark.parametrize( "action,existing_paths,missing_paths,existing_links", [ - ("--copy", ["myfile", "data/local/myfile"], [], []), - ("--move", ["data/local/myfile"], ["myfile"], []), - ("--link", ["myfile"], [], ["data/local/myfile"]), + ("--copy", ["my-file", "data/local/my-file"], [], []), + ("--move", ["data/local/my-file"], ["my-file"], []), + ("--link", ["my-file"], [], ["data/local/my-file"]), ], ) -def test_add_local_actions(runner, client, action, existing_paths, missing_paths, existing_links): +def test_add_local_actions(runner, project, action, existing_paths, missing_paths, existing_links): """Test adding local data with different actions.""" - with (project_context.path / "myfile").open("w") as fp: + with (project.path / "my-file").open("w") as fp: fp.write("my file") - result = runner.invoke(cli, ["dataset", "add", action, "--create", "local", "myfile"]) + result = runner.invoke(cli, ["dataset", "add", action, "--create", "local", "my-file"]) assert 0 == result.exit_code, format_result_exception(result) for existing_path in existing_paths: @@ -661,7 +670,7 @@ def test_add_local_actions(runner, client, action, existing_paths, missing_paths assert path.is_symlink() -def test_add_an_empty_directory(runner, client, directory_tree): +def test_add_an_empty_directory(runner, project, directory_tree): """Test adding an empty directory to a dataset.""" path = directory_tree / "empty-directory" path.mkdir() @@ -671,33 +680,33 @@ def test_add_an_empty_directory(runner, client, directory_tree): assert "Error: There are no files to create a dataset" in result.output -def test_repository_file_to_dataset(runner, client, subdirectory, load_dataset_with_injection): +def test_repository_file_to_dataset(runner, project, subdirectory): """Test adding a file from the repository into a dataset.""" # create a dataset assert 0 == runner.invoke(cli, ["dataset", "create", "dataset"]).exit_code - a_path = project_context.path / "a" + a_path = project.path / "a" a_path.write_text("a content") - project_context.repository.add(a_path) - project_context.repository.commit(message="Added file a", no_verify=True) + project.repository.add(a_path) + project.repository.commit(message="Added file a", no_verify=True) result = runner.invoke(cli, ["dataset", "add", "--copy", "dataset", str(a_path)], catch_exceptions=False) assert 0 == result.exit_code, format_result_exception(result) - dataset = load_dataset_with_injection("dataset", client) + dataset = get_dataset_with_injection("dataset") assert dataset.title == "dataset" assert dataset.find_file("data/dataset/a") is not None -def test_relative_import_to_dataset(tmpdir, runner, client, subdirectory, load_dataset_with_injection): +def test_relative_import_to_dataset(tmpdir, runner, project, subdirectory): """Test importing data from a directory structure.""" # create a dataset result = runner.invoke(cli, ["dataset", "create", "dataset"]) assert 0 == result.exit_code, format_result_exception(result) assert "OK" in result.output - dataset = load_dataset_with_injection("dataset", client) + dataset = get_dataset_with_injection("dataset") assert dataset.title == "dataset" zero_data = tmpdir.join("zero.txt") @@ -717,9 +726,9 @@ def test_relative_import_to_dataset(tmpdir, runner, client, subdirectory, load_d result = runner.invoke(cli, ["dataset", "add", "--copy", "dataset"] + paths, catch_exceptions=False) assert 0 == result.exit_code, format_result_exception(result) - assert os.stat(project_context.path / DATA_DIR / "dataset" / "zero.txt") - assert os.stat(project_context.path / DATA_DIR / "dataset" / "first" / "first.txt") - assert os.stat(project_context.path / DATA_DIR / "dataset" / "first" / "second" / "second.txt") + assert os.stat(project.path / DATA_DIR / "dataset" / "zero.txt") + assert os.stat(project.path / DATA_DIR / "dataset" / "first" / "first.txt") + assert os.stat(project.path / DATA_DIR / "dataset" / "first" / "second" / "second.txt") @pytest.mark.parametrize( @@ -729,30 +738,30 @@ def test_relative_import_to_dataset(tmpdir, runner, client, subdirectory, load_d (["-s", "file", "/some/local/path"], "Cannot use '-s/--src/--source' with URLs or local files."), ], ) -def test_usage_error_in_add_from_url(runner, client, params, message): +def test_usage_error_in_add_from_url(runner, project, params, message): """Test user's errors when adding URL/local file to a dataset.""" result = runner.invoke(cli, ["dataset", "add", "remote", "--create"] + params, catch_exceptions=False) assert 2 == result.exit_code assert message in result.output -def test_add_untracked_file(runner, project, client, load_dataset_with_injection): +def test_add_untracked_file(runner, project): """Test adding an untracked file to a dataset.""" - untracked = project_context.path / "untracked" + untracked = project.path / "untracked" untracked.write_text("untracked") result = runner.invoke(cli, ["dataset", "add", "--copy", "my-dataset", "--create", str(untracked)]) assert 0 == result.exit_code, format_result_exception(result) - assert project_context.repository.is_dirty(untracked_files=True) - assert project_context.repository.contains(project_context.path / "data" / "my-dataset" / "untracked") - assert load_dataset_with_injection("my-dataset", client).find_file("data/my-dataset/untracked") + assert project.repository.is_dirty(untracked_files=True) + assert project.repository.contains(project.path / "data" / "my-dataset" / "untracked") + assert get_dataset_with_injection("my-dataset").find_file("data/my-dataset/untracked") -def test_add_untracked_file_as_external(runner, project, client, load_dataset_with_injection): +def test_add_untracked_file_as_external(runner, project): """Test adding an untracked directory to a dataset as external.""" - untracked = project_context.path / "untracked" + untracked = project.path / "untracked" untracked.mkdir(exist_ok=True) some_file = untracked / "some-file" some_file.write_text("untracked file") @@ -761,11 +770,11 @@ def test_add_untracked_file_as_external(runner, project, client, load_dataset_wi assert 0 == result.exit_code, format_result_exception(result) - path = project_context.path / DATA_DIR / "my-dataset" / "untracked" / "some-file" + path = project.path / DATA_DIR / "my-dataset" / "untracked" / "some-file" - assert project_context.repository.is_dirty(untracked_files=True) - assert not project_context.repository.contains(untracked) - assert load_dataset_with_injection("my-dataset", client).find_file(path.relative_to(project_context.path)) + assert project.repository.is_dirty(untracked_files=True) + assert not project.repository.contains(untracked) + assert get_dataset_with_injection("my-dataset").find_file(path.relative_to(project.path)) assert path.is_symlink() assert path.resolve() == some_file.resolve() @@ -776,7 +785,7 @@ def test_add_untracked_file_as_external(runner, project, client, load_dataset_wi assert link.startswith("..") -def test_add_data_directory(runner, client, directory_tree): +def test_add_data_directory(runner, project, directory_tree): """Test adding a dataset's data directory to it prints an error.""" result = runner.invoke(cli, ["dataset", "add", "--copy", "--create", "new-dataset", str(directory_tree)]) assert 0 == result.exit_code, format_result_exception(result) @@ -786,7 +795,7 @@ def test_add_data_directory(runner, client, directory_tree): assert "Cannot recursively add path containing dataset's data directory" in result.output -def test_dataset_add_with_copy(tmpdir, runner, project, client, load_dataset_with_injection): +def test_dataset_add_with_copy(tmpdir, runner, project): """Test adding data to dataset with copy.""" import os import stat @@ -809,11 +818,11 @@ def test_dataset_add_with_copy(tmpdir, runner, project, client, load_dataset_wit assert 0 == result.exit_code, format_result_exception(result) received_inodes = [] - dataset = load_dataset_with_injection("my-dataset", client) + dataset = get_dataset_with_injection("my-dataset") assert dataset.title == "my-dataset" for file in dataset.files: - path = (project_context.path / file.entity.path).resolve() + path = (project.path / file.entity.path).resolve() received_inodes.append(os.lstat(path)[stat.ST_INO]) # check that original inodes are within created ones @@ -822,7 +831,7 @@ def test_dataset_add_with_copy(tmpdir, runner, project, client, load_dataset_wit @pytest.mark.serial -def test_dataset_add_many(tmpdir, runner, project, client): +def test_dataset_add_many(tmpdir, runner, project): """Test adding many files to dataset.""" # create a dataset @@ -840,27 +849,27 @@ def test_dataset_add_many(tmpdir, runner, project, client): result = runner.invoke(cli, ["dataset", "add", "--copy", "my-dataset"] + paths) assert 0 == result.exit_code, format_result_exception(result) - assert len(project_context.repository.head.commit.message.splitlines()[0]) <= 100 + assert len(project.repository.head.commit.message.splitlines()[0]) <= 100 -def test_dataset_file_path_from_subdirectory(runner, client, subdirectory, load_dataset_with_injection): +def test_dataset_file_path_from_subdirectory(runner, project, subdirectory): """Test adding a file into a dataset and check path independent of the CWD.""" # create a dataset result = runner.invoke(cli, ["dataset", "create", "dataset"]) assert 0 == result.exit_code, format_result_exception(result) assert "OK" in result.output - a_path = project_context.path / "a" + a_path = project.path / "a" a_path.write_text("a text") - project_context.repository.add(a_path) - project_context.repository.commit(message="Added file a") + project.repository.add(a_path) + project.repository.commit(message="Added file a") # add data result = runner.invoke(cli, ["dataset", "add", "--copy", "dataset", str(a_path)], catch_exceptions=False) assert 0 == result.exit_code, format_result_exception(result) - dataset = load_dataset_with_injection("dataset", client) + dataset = get_dataset_with_injection("dataset") file = dataset.find_file("data/dataset/a") assert file is not None assert "data/dataset/a" == file.entity.path @@ -1040,10 +1049,10 @@ def test_datasets_ls_files_tabular_patterns(runner, project, directory_tree): assert "file3" in result.output -def test_datasets_ls_files_tabular_creators(runner, client, directory_tree, load_dataset_with_injection): +def test_datasets_ls_files_tabular_creators(runner, project, directory_tree): """Test listing of data within dataset with creators filters.""" assert 0 == runner.invoke(cli, ["dataset", "add", "--copy", "my-dataset", "-c", str(directory_tree)]).exit_code - creator = load_dataset_with_injection("my-dataset", client).creators[0].name + creator = get_dataset_with_injection("my-dataset").creators[0].name assert creator is not None @@ -1056,7 +1065,7 @@ def test_datasets_ls_files_tabular_creators(runner, client, directory_tree, load assert file_.name in result.output -def test_datasets_ls_files_correct_paths(runner, client, directory_tree): +def test_datasets_ls_files_correct_paths(runner, project, directory_tree): """Test listing of data within dataset and check that paths are correct.""" assert 0 == runner.invoke(cli, ["dataset", "add", "--copy", "my-dataset", "-c", str(directory_tree)]).exit_code @@ -1070,7 +1079,7 @@ def test_datasets_ls_files_correct_paths(runner, client, directory_tree): path = entity.get("http://www.w3.org/ns/prov#atLocation") if path: path = path[0]["@value"] - assert (project_context.path / path).exists() + assert (project.path / path).exists() def test_datasets_ls_files_with_name(directory_tree, runner, project): @@ -1089,14 +1098,14 @@ def test_datasets_ls_files_with_name(directory_tree, runner, project): assert "dir1/file2" in result.output -def test_datasets_ls_files_correct_size(runner, client, directory_tree, large_file): +def test_datasets_ls_files_correct_size(runner, project, directory_tree, large_file): """Test ls-files shows the size stored in git and not the current file size.""" assert ( 0 == runner.invoke(cli, ["dataset", "add", "--copy", "my-dataset", "-c", str(directory_tree / "file1")]).exit_code ) - path = project_context.path / DATA_DIR / "my-dataset" / "file1" + path = project.path / DATA_DIR / "my-dataset" / "file1" shutil.copy(large_file, path) # check include / exclude filters @@ -1109,17 +1118,15 @@ def test_datasets_ls_files_correct_size(runner, client, directory_tree, large_fi assert 13 == size -@pytest.mark.skip(reason="FIXME: We don't have commit shas for files. What should be listed here?") -def test_datasets_ls_files_correct_commit(runner, client, directory_tree): +@pytest.mark.skip(reason="FIXME: We don't have commit SHAs for files. What should be listed here?") +def test_datasets_ls_files_correct_commit(runner, project, directory_tree): """Test ls-files shows the size stored in git and not the current file size.""" assert ( 0 == runner.invoke(cli, ["dataset", "add", "--copy", "my-dataset", "-c", str(directory_tree / "file1")]).exit_code ) - commit = project_context.repository.get_previous_commit( - path=project_context.path / DATA_DIR / "my-dataset" / "file1" - ) + commit = project.repository.get_previous_commit(path=project.path / DATA_DIR / "my-dataset" / "file1") # check include / exclude filters result = runner.invoke(cli, ["dataset", "ls-files", "--columns=commit,path"]) @@ -1138,7 +1145,7 @@ def test_dataset_unlink_file_not_found(runner, project): assert 0 == result.exit_code, format_result_exception(result) assert "OK" in result.output - result = runner.invoke(cli, ["dataset", "unlink", "my-dataset", "--include", "notthere.csv"]) + result = runner.invoke(cli, ["dataset", "unlink", "my-dataset", "--include", "not-there.csv"]) assert 2 == result.exit_code, format_result_exception(result) @@ -1166,7 +1173,7 @@ def test_dataset_unlink_file_abort_unlinking(tmpdir, runner, project): assert "Aborted!" in result.output -def test_dataset_unlink_file(tmpdir, runner, client, subdirectory, load_dataset_with_injection): +def test_dataset_unlink_file(tmpdir, runner, project, subdirectory): """Test unlinking of file and check removal from dataset.""" # create a dataset result = runner.invoke(cli, ["dataset", "create", "my-dataset"]) @@ -1180,40 +1187,40 @@ def test_dataset_unlink_file(tmpdir, runner, client, subdirectory, load_dataset_ # add data to dataset result = runner.invoke(cli, ["dataset", "add", "--copy", "my-dataset", str(new_file)]) assert 0 == result.exit_code, format_result_exception(result) - assert not project_context.repository.is_dirty(untracked_files=True) + assert not project.repository.is_dirty(untracked_files=True) - dataset = load_dataset_with_injection("my-dataset", client) + dataset = get_dataset_with_injection("my-dataset") created_dataset_files = [Path(f.entity.path) for f in dataset.files] assert new_file.basename in {f.name for f in created_dataset_files} - commit_sha_before = project_context.repository.head.commit.hexsha + commit_sha_before = project.repository.head.commit.hexsha result = runner.invoke(cli, ["dataset", "unlink", "my-dataset", "--include", new_file.basename, "-y"]) assert 0 == result.exit_code, format_result_exception(result) - assert not project_context.repository.is_dirty(untracked_files=True) + assert not project.repository.is_dirty(untracked_files=True) - commit_sha_after = project_context.repository.head.commit.hexsha + commit_sha_after = project.repository.head.commit.hexsha assert commit_sha_before != commit_sha_after - dataset = load_dataset_with_injection("my-dataset", client) + dataset = get_dataset_with_injection("my-dataset") assert new_file.basename not in [Path(f.entity.path).name for f in dataset.files if not f.is_removed()] assert all([not f.exists() for f in created_dataset_files]) -def test_dataset_rm(runner, client, directory_tree, subdirectory, load_dataset_with_injection): +def test_dataset_rm(runner, project, directory_tree, subdirectory): """Test removal of a dataset.""" assert ( 0 == runner.invoke(cli, ["dataset", "add", "--copy", "--create", "my-dataset", str(directory_tree)]).exit_code ) - assert load_dataset_with_injection("my-dataset", client) + assert get_dataset_with_injection("my-dataset") result = runner.invoke(cli, ["dataset", "rm", "my-dataset"]) assert 0 == result.exit_code, format_result_exception(result) assert "OK" in result.output - assert not load_dataset_with_injection("my-dataset", client) + assert not get_dataset_with_injection("my-dataset") result = runner.invoke(cli, ["doctor"], catch_exceptions=False) assert 0 == result.exit_code, format_result_exception(result) @@ -1222,7 +1229,7 @@ def test_dataset_rm(runner, client, directory_tree, subdirectory, load_dataset_w assert 0 == result.exit_code, format_result_exception(result) -def test_dataset_rm_failure(runner, client): +def test_dataset_rm_failure(runner, project): """Test errors in removal of a dataset.""" assert 2 == runner.invoke(cli, ["dataset", "rm"]).exit_code assert 1 == runner.invoke(cli, ["dataset", "rm", "does-not-exist"]).exit_code @@ -1230,27 +1237,27 @@ def test_dataset_rm_failure(runner, client): def test_dataset_overwrite_no_confirm(runner, project): """Check dataset overwrite behaviour without confirmation.""" - result = runner.invoke(cli, ["dataset", "create", "rokstar"]) + result = runner.invoke(cli, ["dataset", "create", "rockstar"]) assert 0 == result.exit_code, format_result_exception(result) assert "OK" in result.output - result = runner.invoke(cli, ["dataset", "create", "rokstar"]) + result = runner.invoke(cli, ["dataset", "create", "rockstar"]) assert 1 == result.exit_code assert "OK" not in result.output @pytest.mark.parametrize("dirty", [False, True]) -def test_dataset_edit(runner, client, project, dirty, subdirectory, load_dataset_with_injection): +def test_dataset_edit(runner, project, dirty, subdirectory): """Check dataset metadata editing.""" if dirty: - (project_context.path / "README.md").write_text("Make repo dirty.") + (project.path / "README.md").write_text("Make repo dirty.") metadata = { "@id": "https://example.com/annotation1", "@type": "https://schema.org/specialType", "https://schema.org/specialProperty": "some_unique_value", } - metadata_path = project_context.path / "metadata.json" + metadata_path = project.path / "metadata.json" metadata_path.write_text(json.dumps(metadata)) result = runner.invoke( @@ -1272,7 +1279,7 @@ def test_dataset_edit(runner, client, project, dirty, subdirectory, load_dataset warning_msg = "Warning: No email or wrong format for: Forename2 Surname2" assert warning_msg in result.output - dataset = load_dataset_with_injection("dataset", client) + dataset = get_dataset_with_injection("dataset") assert " new description " == dataset.description assert "original title" == dataset.title assert {creator1, creator2}.issubset({c.full_identity for c in dataset.creators}) @@ -1300,7 +1307,7 @@ def test_dataset_edit(runner, client, project, dirty, subdirectory, load_dataset assert 0 == result.exit_code, format_result_exception(result) assert "Successfully updated: custom_metadata." in result.output - dataset = load_dataset_with_injection("dataset", client) + dataset = get_dataset_with_injection("dataset") assert " new description " == dataset.description assert "new title" == dataset.title assert {creator1, creator2}.issubset({c.full_identity for c in dataset.creators}) @@ -1313,17 +1320,17 @@ def test_dataset_edit(runner, client, project, dirty, subdirectory, load_dataset @pytest.mark.parametrize("dirty", [False, True]) -def test_dataset_edit_unset(runner, client, project, dirty, subdirectory, load_dataset_with_injection): +def test_dataset_edit_unset(runner, project, dirty, subdirectory): """Check dataset metadata editing unsetting values.""" if dirty: - (project_context.path / "README.md").write_text("Make repo dirty.") + (project.path / "README.md").write_text("Make repo dirty.") metadata = { "@id": "https://example.com/annotation1", "@type": "https://schema.org/specialType", "https://schema.org/specialProperty": "some_unique_value", } - metadata_path = project_context.path / "metadata.json" + metadata_path = project.path / "metadata.json" metadata_path.write_text(json.dumps(metadata)) result = runner.invoke( @@ -1352,29 +1359,29 @@ def test_dataset_edit_unset(runner, client, project, dirty, subdirectory, load_d assert 0 == result.exit_code, format_result_exception(result) assert "Successfully updated: keywords, custom_metadata." in result.output - dataset = load_dataset_with_injection("dataset", client) + dataset = get_dataset_with_injection("dataset") assert 0 == len(dataset.keywords) assert 0 == len(dataset.annotations) @pytest.mark.parametrize("dirty", [False, True]) -def test_dataset_edit_no_change(runner, client, project, dirty): +def test_dataset_edit_no_change(runner, project, dirty): """Check metadata editing does not commit when there is no change.""" result = runner.invoke(cli, ["dataset", "create", "dataset", "-t", "original title"]) assert 0 == result.exit_code, format_result_exception(result) if dirty: - (project_context.path / "README.md").write_text("Make repo dirty.") + (project.path / "README.md").write_text("Make repo dirty.") - commit_sha_before = project_context.repository.head.commit.hexsha + commit_sha_before = project.repository.head.commit.hexsha result = runner.invoke(cli, ["dataset", "edit", "dataset"], catch_exceptions=False) assert 0 == result.exit_code, format_result_exception(result) assert "Nothing to update." in result.output - commit_sha_after = project_context.repository.head.commit.hexsha + commit_sha_after = project.repository.head.commit.hexsha assert commit_sha_after == commit_sha_before - assert dirty is project_context.repository.is_dirty(untracked_files=True) + assert dirty is project.repository.is_dirty(untracked_files=True) @pytest.mark.parametrize( @@ -1401,7 +1408,7 @@ def test_dataset_provider_resolution_dataverse(doi_responses, uri): assert type(provider) is DataverseProvider -def test_dataset_tag(tmpdir, runner, client, subdirectory, get_datasets_provenance_with_injection): +def test_dataset_tag(tmpdir, runner, project, subdirectory): """Test that dataset tags can be created.""" result = runner.invoke(cli, ["dataset", "create", "my-dataset"]) assert 0 == result.exit_code, format_result_exception(result) @@ -1427,7 +1434,7 @@ def test_dataset_tag(tmpdir, runner, client, subdirectory, get_datasets_provenan result = runner.invoke(cli, ["dataset", "tag", "my-dataset", "aBc9.34-11_55.t"], catch_exceptions=False) assert 0 == result.exit_code, format_result_exception(result) - with get_datasets_provenance_with_injection(client) as datasets_provenance: + with get_datasets_provenance_with_injection() as datasets_provenance: dataset = datasets_provenance.get_by_name("my-dataset") all_tags = datasets_provenance.get_all_tags(dataset) assert {dataset.id} == {t.dataset_id.value for t in all_tags} @@ -1437,7 +1444,7 @@ def test_dataset_tag(tmpdir, runner, client, subdirectory, get_datasets_provenan @pytest.mark.parametrize("form", ["tabular", "json-ld"]) -def test_dataset_ls_tags(tmpdir, runner, project, client, form, load_dataset_with_injection): +def test_dataset_ls_tags(tmpdir, runner, project, form): """Test listing of dataset tags.""" result = runner.invoke(cli, ["dataset", "create", "my-dataset"]) assert 0 == result.exit_code, format_result_exception(result) @@ -1451,13 +1458,13 @@ def test_dataset_ls_tags(tmpdir, runner, project, client, form, load_dataset_wit result = runner.invoke(cli, ["dataset", "add", "--copy", "my-dataset", str(new_file)], catch_exceptions=False) assert 0 == result.exit_code, format_result_exception(result) - id1 = load_dataset_with_injection("my-dataset", client).id + id1 = get_dataset_with_injection("my-dataset").id # tag dataset result = runner.invoke(cli, ["dataset", "tag", "my-dataset", "1.0", "-d", "first tag!"], catch_exceptions=False) assert 0 == result.exit_code, format_result_exception(result) - id2 = load_dataset_with_injection("my-dataset", client).id + id2 = get_dataset_with_injection("my-dataset").id result = runner.invoke(cli, ["dataset", "tag", "my-dataset", "aBc9.34-11_55.t"], catch_exceptions=False) assert 0 == result.exit_code, format_result_exception(result) @@ -1473,7 +1480,7 @@ def test_dataset_ls_tags(tmpdir, runner, project, client, form, load_dataset_wit assert id2 in result.output -def test_dataset_rm_tag(tmpdir, runner, client, subdirectory, load_dataset_with_injection): +def test_dataset_rm_tag(tmpdir, runner, project, subdirectory): """Test removing of dataset tags.""" result = runner.invoke(cli, ["dataset", "create", "my-dataset"]) assert 0 == result.exit_code, format_result_exception(result) @@ -1487,7 +1494,7 @@ def test_dataset_rm_tag(tmpdir, runner, client, subdirectory, load_dataset_with_ result = runner.invoke(cli, ["dataset", "add", "--copy", "my-dataset", str(new_file)], catch_exceptions=False) assert 0 == result.exit_code, format_result_exception(result) - id1 = load_dataset_with_injection("my-dataset", client).id + id1 = get_dataset_with_injection("my-dataset").id # tag dataset result = runner.invoke(cli, ["dataset", "tag", "my-dataset", "1.0", "-d", "first tag!"], catch_exceptions=False) @@ -1514,7 +1521,7 @@ def test_dataset_rm_tag(tmpdir, runner, client, subdirectory, load_dataset_with_ assert 0 == result.exit_code, format_result_exception(result) -def test_dataset_rm_tags_multiple(tmpdir, runner, project, client): +def test_dataset_rm_tags_multiple(tmpdir, runner, project): """Test removing multiple dataset tags at once.""" result = runner.invoke(cli, ["dataset", "create", "my-dataset"]) assert 0 == result.exit_code, format_result_exception(result) @@ -1540,7 +1547,7 @@ def test_dataset_rm_tags_multiple(tmpdir, runner, project, client): assert "3" not in result.output -def test_dataset_rm_tags_failure(tmpdir, runner, project, client): +def test_dataset_rm_tags_failure(tmpdir, runner, project): """Test removing non-existent dataset tag.""" result = runner.invoke(cli, ["dataset", "rm-tags", "my-dataset", "1"], catch_exceptions=False) @@ -1561,7 +1568,7 @@ def test_dataset_rm_tags_failure(tmpdir, runner, project, client): assert 2 == result.exit_code -def test_dataset_clean_up_when_add_fails(runner, client, subdirectory): +def test_dataset_clean_up_when_add_fails(runner, project, subdirectory): """Test project is cleaned when dataset add fails for a new dataset.""" # add a non-existing path to a new dataset result = runner.invoke( @@ -1569,20 +1576,20 @@ def test_dataset_clean_up_when_add_fails(runner, client, subdirectory): ) assert 2 == result.exit_code - ref = project_context.metadata_path / "refs" / "datasets" / "new-dataset" + ref = project.metadata_path / "refs" / "datasets" / "new-dataset" assert not ref.is_symlink() and not ref.exists() -def test_avoid_empty_commits(runner, client, directory_tree): +def test_avoid_empty_commits(runner, project, directory_tree): """Test no empty commit is created when adding existing data.""" runner.invoke(cli, ["dataset", "create", "my-dataset"]) - commit_sha_before = project_context.repository.head.commit.hexsha + commit_sha_before = project.repository.head.commit.hexsha result = runner.invoke(cli, ["dataset", "add", "--copy", "my-dataset", str(directory_tree)]) assert 0 == result.exit_code, format_result_exception(result) - commit_sha_after = project_context.repository.head.commit.hexsha + commit_sha_after = project.repository.head.commit.hexsha assert commit_sha_before != commit_sha_after commit_sha_before = commit_sha_after @@ -1590,39 +1597,39 @@ def test_avoid_empty_commits(runner, client, directory_tree): assert 1 == result.exit_code assert "Error: There is nothing to commit." in result.output - commit_sha_after = project_context.repository.head.commit.hexsha + commit_sha_after = project.repository.head.commit.hexsha assert commit_sha_before == commit_sha_after -def test_multiple_dataset_commits(runner, client, directory_tree): +def test_multiple_dataset_commits(runner, project, directory_tree): """Check adding existing data to multiple datasets.""" - commit_sha_before = project_context.repository.head.commit.hexsha + commit_sha_before = project.repository.head.commit.hexsha result = runner.invoke(cli, ["dataset", "add", "--copy", "-c", "my-dataset1", str(directory_tree)]) assert 0 == result.exit_code, format_result_exception(result) - commit_sha_after = project_context.repository.head.commit.hexsha + commit_sha_after = project.repository.head.commit.hexsha assert commit_sha_before != commit_sha_after commit_sha_before = commit_sha_after result = runner.invoke(cli, ["dataset", "add", "--copy", "-c", "my-dataset2", str(directory_tree)]) assert 0 == result.exit_code, format_result_exception(result) - commit_sha_after = project_context.repository.head.commit.hexsha + commit_sha_after = project.repository.head.commit.hexsha assert commit_sha_before != commit_sha_after @pytest.mark.parametrize("filename", [".renku", ".renku/", "Dockerfile"]) -def test_add_protected_file(runner, client, filename, subdirectory): +def test_add_protected_file(runner, project, filename, subdirectory): """Check adding a protected file.""" - result = runner.invoke(cli, ["dataset", "add", "--copy", "-c", "my-dataset1", str(project_context.path / filename)]) + result = runner.invoke(cli, ["dataset", "add", "--copy", "-c", "my-dataset1", str(project.path / filename)]) assert 1 == result.exit_code assert "Error: The following paths are protected" in result.output -@pytest.mark.parametrize("filename", [".renkunotactuallyrenku", "thisisnot.renku"]) -def test_add_nonprotected_file(runner, client, tmpdir, filename, subdirectory): +@pytest.mark.parametrize("filename", [".renku-not-actually-renku", "this-is-not.renku"]) +def test_add_non_protected_file(runner, project, tmpdir, filename, subdirectory): """Check adding an 'almost' protected file.""" new_file = tmpdir.join(filename) new_file.write(str("test")) @@ -1632,29 +1639,29 @@ def test_add_nonprotected_file(runner, client, tmpdir, filename, subdirectory): assert 0 == result.exit_code, format_result_exception(result) -def test_add_removes_local_path_information(runner, client, directory_tree, load_dataset_with_injection): +def test_add_removes_local_path_information(runner, project, directory_tree): """Test added local paths are stored as relative path.""" result = runner.invoke(cli, ["dataset", "add", "--copy", "-c", "my-dataset", str(directory_tree)]) assert 0 == result.exit_code, format_result_exception(result) - dataset = load_dataset_with_injection("my-dataset", client) - relative_path = os.path.relpath(directory_tree, project_context.path) + dataset = get_dataset_with_injection("my-dataset") + relative_path = os.path.relpath(directory_tree, project.path) for file in dataset.files: assert file.source.startswith(relative_path) assert file.source.endswith(Path(file.entity.path).name) -def test_pull_data_from_lfs(runner, client, tmpdir, subdirectory, no_lfs_size_limit): +def test_pull_data_from_lfs(runner, project, tmpdir, subdirectory, no_lfs_size_limit): """Test pulling data from LFS using relative paths.""" data = tmpdir.join("data.txt") data.write("DATA") result = runner.invoke(cli, ["dataset", "add", "--copy", "-c", "my-data", str(data)]) assert 0 == result.exit_code, format_result_exception(result) - attributes = (project_context.path / ".gitattributes").read_text().split() + attributes = (project.path / ".gitattributes").read_text().split() assert "data/my-data/data.txt" in attributes - path = project_context.path / DATA_DIR / "my-data" / "data.txt" + path = project.path / DATA_DIR / "my-data" / "data.txt" relative_path = os.path.relpath(path, os.getcwd()) result = runner.invoke(cli, ["storage", "pull", relative_path]) @@ -1666,12 +1673,12 @@ def test_lfs_hook(project_with_injection, subdirectory, large_file): filenames = {"large-file", "large file with whitespace", "large*file?with wildcards"} for filename in filenames: - shutil.copy(large_file, project_context.path / filename) - project_context.repository.add(all=True) + shutil.copy(large_file, project_with_injection.path / filename) + project_with_injection.repository.add(all=True) # Commit fails when file is not tracked in LFS with pytest.raises(errors.GitCommandError) as e: - project_context.repository.commit("large files not in LFS") + project_with_injection.repository.commit("large files not in LFS") assert "You are trying to commit large files to Git" in e.value.stderr for filename in filenames: @@ -1679,11 +1686,13 @@ def test_lfs_hook(project_with_injection, subdirectory, large_file): # Can be committed after being tracked in LFS track_paths_in_storage(*filenames) - project_context.repository.add(all=True) - commit = project_context.repository.commit("large files tracked") + project_with_injection.repository.add(all=True) + commit = project_with_injection.repository.commit("large files tracked") assert "large files tracked\n" == commit.message - tracked_lfs_files = set(project_context.repository.run_git_command("lfs", "ls-files", "--name-only").split("\n")) + tracked_lfs_files = set( + project_with_injection.repository.run_git_command("lfs", "ls-files", "--name-only").split("\n") + ) assert filenames == tracked_lfs_files @@ -1698,10 +1707,10 @@ def test_lfs_hook_autocommit(runner, project, subdirectory, large_file, use_env_ filenames = {"large-file", "large file with whitespace", "large*file?with wildcards"} for filename in filenames: - shutil.copy(large_file, project_context.path / filename) - project_context.repository.add(all=True) + shutil.copy(large_file, project.path / filename) + project.repository.add(all=True) - result = project_context.repository.run_git_command( + result = project.repository.run_git_command( "commit", message="large files not in LFS", with_extended_output=True, @@ -1714,9 +1723,9 @@ def test_lfs_hook_autocommit(runner, project, subdirectory, large_file, use_env_ assert "Adding files to LFS" in result[2] for filename in filenames: assert f'Tracking "{filename}"' in result[2] - assert len(get_dirty_paths(project)) == 0 # NOTE: make sure repo is clean + assert len(get_dirty_paths(project.repository)) == 0 # NOTE: make sure repo is clean - tracked_lfs_files = set(project_context.repository.run_git_command("lfs", "ls-files", "--name-only").split("\n")) + tracked_lfs_files = set(project.repository.run_git_command("lfs", "ls-files", "--name-only").split("\n")) assert filenames == tracked_lfs_files @@ -1729,9 +1738,9 @@ def test_lfs_hook_can_be_avoided(runner, project, subdirectory, large_file): assert "OK" in result.output -def test_datadir_hook(runner, client, subdirectory): +def test_datadir_hook(runner, project, subdirectory): """Test pre-commit hook fir checking datadir files.""" - datadir = project_context.path / "test" + datadir = project.path / "test" datadir.mkdir() result = runner.invoke(cli, ["--no-external-storage", "dataset", "create", "--datadir", str(datadir), "my-dataset"]) @@ -1742,11 +1751,11 @@ def test_datadir_hook(runner, client, subdirectory): file2 = datadir / "another_file" file2.write_text("some updates") - project_context.repository.add(all=True) + project.repository.add(all=True) # Commit fails when a file in datadir is not added to a dataset with pytest.raises(errors.GitCommandError) as e: - project_context.repository.commit("datadir files not in dataset") + project.repository.commit("datadir files not in dataset") assert "Files in datasets data directory that aren't up to date" in e.value.stderr @@ -1760,24 +1769,24 @@ def test_datadir_hook(runner, client, subdirectory): file3 = datadir / "yet_another_new_file" file3.write_text("some updates") - project_context.repository.add(all=True) + project.repository.add(all=True) # Commit fails when a file in datadir is not added to a dataset with pytest.raises(errors.GitCommandError) as e: - project_context.repository.commit("datadir files not in dataset") + project.repository.commit("datadir files not in dataset") assert "Files in datasets data directory that aren't up to date" in e.value.stderr result = runner.invoke(cli, ["config", "set", "check_datadir_files", "false"]) assert 0 == result.exit_code, format_result_exception(result) - project_context.repository.add(all=True) + project.repository.add(all=True) # Commit would fail if a file in datadir is not added to a dataset - project_context.repository.commit("datadir files in dataset") + project.repository.commit("datadir files in dataset") @pytest.mark.parametrize("external", [False, True]) -def test_add_existing_files(runner, client, directory_tree, external, no_lfs_size_limit, load_dataset_with_injection): +def test_add_existing_files(runner, project, directory_tree, external, no_lfs_size_limit): """Check adding/overwriting existing files.""" param = ["--external"] if external else [] @@ -1787,7 +1796,7 @@ def test_add_existing_files(runner, client, directory_tree, external, no_lfs_siz path = Path(DATA_DIR) / "my-dataset" / directory_tree.name / "file1" - dataset = load_dataset_with_injection("my-dataset", client) + dataset = get_dataset_with_injection("my-dataset") assert dataset.find_file(path) is not None result = runner.invoke(cli, ["dataset", "add", "--copy", "my-dataset", str(directory_tree)] + param) @@ -1806,7 +1815,7 @@ def test_add_existing_files(runner, client, directory_tree, external, no_lfs_siz @pytest.mark.parametrize("external", [False, True]) -def test_add_existing_and_new_files(runner, client, directory_tree, external): +def test_add_existing_and_new_files(runner, project, directory_tree, external): """Check adding/overwriting existing files.""" param = ["--external"] if external else [] @@ -1832,21 +1841,21 @@ def test_add_existing_and_new_files(runner, client, directory_tree, external): assert "OK" in result.output -def test_add_existing_files_updates_metadata(runner, client, large_file, load_dataset_with_injection): +def test_add_existing_files_updates_metadata(runner, project, large_file): """Check overwriting existing files updates their metadata.""" result = runner.invoke(cli, ["dataset", "add", "--copy", "my-dataset", "--create", str(large_file)]) assert result.exit_code == 0, result.output path = Path(DATA_DIR) / "my-dataset" / large_file.name - before = load_dataset_with_injection("my-dataset", client).find_file(path) + before = get_dataset_with_injection("my-dataset").find_file(path) time.sleep(2) large_file.write_text("New modified content.") assert 0 == runner.invoke(cli, ["dataset", "add", "--copy", "my-dataset", "--overwrite", str(large_file)]).exit_code - after = load_dataset_with_injection("my-dataset", client).find_file(path) + after = get_dataset_with_injection("my-dataset").find_file(path) assert before.id != after.id assert before.date_added != after.date_added assert before.entity.checksum != after.entity.checksum @@ -1854,12 +1863,12 @@ def test_add_existing_files_updates_metadata(runner, client, large_file, load_da assert before.source == after.source -def test_add_ignored_files(runner, client, directory_tree, load_dataset_with_injection): +def test_add_ignored_files(runner, project, directory_tree): """Check adding/force-adding ignored files.""" source_path = directory_tree / ".DS_Store" source_path.write_text("ignored-file") - path = project_context.path / DATA_DIR / "my-dataset" / directory_tree.name / ".DS_Store" - relative_path = str(path.relative_to(project_context.path)) + path = project.path / DATA_DIR / "my-dataset" / directory_tree.name / ".DS_Store" + relative_path = str(path.relative_to(project.path)) result = runner.invoke(cli, ["dataset", "add", "--copy", "-c", "my-dataset", str(directory_tree)]) assert 0 == result.exit_code, format_result_exception(result) @@ -1867,7 +1876,7 @@ def test_add_ignored_files(runner, client, directory_tree, load_dataset_with_inj assert str(source_path) in result.output assert "OK" in result.output - dataset = load_dataset_with_injection("my-dataset", client) + dataset = get_dataset_with_injection("my-dataset") assert dataset.find_file(relative_path) is None @@ -1878,31 +1887,31 @@ def test_add_ignored_files(runner, client, directory_tree, load_dataset_with_inj assert str(source_path) not in result.output assert "OK" in result.output - dataset = load_dataset_with_injection("my-dataset", client) + dataset = get_dataset_with_injection("my-dataset") assert dataset.find_file(relative_path) is not None -def test_add_external_files(runner, client, directory_tree, no_lfs_size_limit, load_dataset_with_injection): +def test_add_external_files(runner, project, directory_tree, no_lfs_size_limit): """Check adding external files.""" result = runner.invoke(cli, ["dataset", "add", "--copy", "-c", "--external", "my-data", str(directory_tree)]) assert 0 == result.exit_code, format_result_exception(result) - path = project_context.path / DATA_DIR / "my-data" / directory_tree.name / "file1" + path = project.path / DATA_DIR / "my-data" / directory_tree.name / "file1" assert path.exists() assert path.is_symlink() external_path = directory_tree / "file1" assert path.resolve() == external_path - dataset = load_dataset_with_injection("my-data", client) - assert dataset.find_file(path.relative_to(project_context.path)) is not None + dataset = get_dataset_with_injection("my-data") + assert dataset.find_file(path.relative_to(project.path)) is not None # Symbolic links should not be tracked - attr_path = project_context.path / ".gitattributes" + attr_path = project.path / ".gitattributes" assert not attr_path.exists() or "file1" not in attr_path.read_text() -def test_overwrite_external_file(runner, client, directory_tree, subdirectory): +def test_overwrite_external_file(runner, project, directory_tree, subdirectory): """Check overwriting external and normal files.""" # Add external file result = runner.invoke(cli, ["dataset", "add", "--create", "--external", "my-data", str(directory_tree)]) @@ -1916,23 +1925,23 @@ def test_overwrite_external_file(runner, client, directory_tree, subdirectory): # Can add the same file with --overwrite result = runner.invoke(cli, ["dataset", "add", "--copy", "my-data", "--overwrite", str(directory_tree)]) assert 0 == result.exit_code, format_result_exception(result) - pointer_files_deleted = list(project_context.pointers_path.rglob("*")) == [] + pointer_files_deleted = list(project.pointers_path.rglob("*")) == [] assert pointer_files_deleted # Can add the same external file result = runner.invoke(cli, ["dataset", "add", "--external", "my-data", "--overwrite", str(directory_tree)]) assert 0 == result.exit_code, format_result_exception(result) - pointer_files_exist = len(list(project_context.pointers_path.rglob("*"))) > 0 + pointer_files_exist = len(list(project.pointers_path.rglob("*"))) > 0 assert pointer_files_exist -def test_overwrite_external_file_keeps_original_content(runner, client, directory_tree): +def test_overwrite_external_file_keeps_original_content(runner, project, directory_tree): """Check overwriting external files doesn't corrupt original content.""" origin = directory_tree / "file1" assert 0 == runner.invoke(cli, ["dataset", "add", "--create", "--external", "my-data", str(origin)]).exit_code - path = project_context.path / DATA_DIR / "my-data" / "file1" + path = project.path / DATA_DIR / "my-data" / "file1" assert "file1 content" == path.read_text() assert path.is_symlink() @@ -1946,10 +1955,10 @@ def test_overwrite_external_file_keeps_original_content(runner, client, director assert "file1 content" == origin.read_text() -def test_add_project_files_as_external(runner, repository): +def test_add_project_files_as_external(runner, project): """Test adding files that are in the git repo as external files.""" path = os.path.join(DATA_DIR, "some-data") - write_and_commit_file(repository, path, "some-content") + write_and_commit_file(project.repository, path, "some-content") result = runner.invoke(cli, ["dataset", "add", "--create", "--external", "my-data", path]) assert 0 == result.exit_code, format_result_exception(result) @@ -1958,31 +1967,31 @@ def test_add_project_files_as_external(runner, repository): assert path in result.output -def test_remove_external_file(runner, client, directory_tree, subdirectory): +def test_remove_external_file(runner, project, directory_tree, subdirectory): """Test removal of external files.""" result = runner.invoke(cli, ["dataset", "add", "--create", "--external", "my-data", str(directory_tree)]) assert 0 == result.exit_code, format_result_exception(result) - targets_before = {str(p.resolve()) for p in project_context.pointers_path.rglob("*")} - path = project_context.path / DATA_DIR / "my-data" / directory_tree.name / "file1" + targets_before = {str(p.resolve()) for p in project.pointers_path.rglob("*")} + path = project.path / DATA_DIR / "my-data" / directory_tree.name / "file1" result = runner.invoke(cli, ["rm", str(path)]) assert 0 == result.exit_code, format_result_exception(result) - targets_after = {str(p.resolve()) for p in project_context.pointers_path.rglob("*")} + targets_after = {str(p.resolve()) for p in project.pointers_path.rglob("*")} removed = targets_before - targets_after assert 1 == len(removed) assert removed.pop().endswith("/file1") -def test_unavailable_external_files(runner, client, directory_tree, subdirectory): +def test_unavailable_external_files(runner, project, directory_tree, subdirectory): """Check for external files that are not available.""" result = runner.invoke(cli, ["dataset", "add", "-c", "--external", "my-data", str(directory_tree)]) assert 0 == result.exit_code, format_result_exception(result) path = Path(DATA_DIR) / "my-data" / directory_tree.name / "file1" - target = (project_context.path / path).resolve() + target = (project.path / path).resolve() directory_tree.joinpath("file1").unlink() assert not path.exists() @@ -2001,36 +2010,36 @@ def test_unavailable_external_files(runner, client, directory_tree, subdirectory @pytest.mark.serial -def test_external_file_update(runner, client, directory_tree, subdirectory): +def test_external_file_update(runner, project, directory_tree, subdirectory): """Check updating external files.""" result = runner.invoke(cli, ["dataset", "add", "-c", "--external", "my-data", str(directory_tree)]) assert 0 == result.exit_code, format_result_exception(result) directory_tree.joinpath("file1").write_text("some updates") - path = project_context.path / DATA_DIR / "my-data" / directory_tree.name / "file1" - previous_commit = project_context.repository.get_previous_commit(path) + path = project.path / DATA_DIR / "my-data" / directory_tree.name / "file1" + previous_commit = project.repository.get_previous_commit(path) result = runner.invoke(cli, ["dataset", "update", "--external", "my-data"]) assert 0 == result.exit_code, format_result_exception(result) - current_commit = project_context.repository.get_previous_commit(path) + current_commit = project.repository.get_previous_commit(path) assert current_commit != previous_commit @pytest.mark.skip("renku update follows symlinks when calculating hashes and doesn't respect external files") @pytest.mark.serial -def test_workflow_with_external_file(runner, client, directory_tree, run, subdirectory, no_lfs_size_limit): +def test_workflow_with_external_file(runner, project, directory_tree, run, subdirectory, no_lfs_size_limit): """Check using external files in workflows.""" result = runner.invoke(cli, ["dataset", "add", "-c", "--external", "my-data", str(directory_tree)]) assert 0 == result.exit_code, format_result_exception(result) - source = project_context.path / DATA_DIR / "my-data" / directory_tree.name / "file1" - output = project_context.path / DATA_DIR / "output.txt" + source = project.path / DATA_DIR / "my-data" / directory_tree.name / "file1" + output = project.path / DATA_DIR / "output.txt" assert 0 == run(args=("run", "wc", "-c"), stdin=source, stdout=output) - previous_commit = project_context.repository.get_previous_commit(output) + previous_commit = project.repository.get_previous_commit(output) # Update external file directory_tree.joinpath("file1").write_text("some updates") @@ -2049,24 +2058,24 @@ def test_workflow_with_external_file(runner, client, directory_tree, run, subdir result = runner.invoke(cli, ["status"]) assert 0 == result.exit_code, format_result_exception(result) - current_commit = project_context.repository.get_previous_commit(source) + current_commit = project.repository.get_previous_commit(source) assert current_commit != previous_commit - attributes = (project_context.path / ".gitattributes").read_text().split() + attributes = (project.path / ".gitattributes").read_text().split() assert "data/output.txt" in attributes -def test_immutability_for_files(directory_tree, runner, client, load_dataset_with_injection): +def test_immutability_for_files(directory_tree, runner, project): """Test dataset's ID changes after a change to dataset files.""" assert 0 == runner.invoke(cli, ["dataset", "create", "my-data"]).exit_code - old_dataset = load_dataset_with_injection("my-data", client) + old_dataset = get_dataset_with_injection("my-data") time.sleep(1) # Add some files assert 0 == runner.invoke(cli, ["dataset", "add", "--copy", "my-data", str(directory_tree)]).exit_code - dataset = load_dataset_with_injection("my-data", client) + dataset = get_dataset_with_injection("my-data") assert_dataset_is_mutated(old=old_dataset, new=dataset) old_dataset = dataset @@ -2076,7 +2085,7 @@ def test_immutability_for_files(directory_tree, runner, client, load_dataset_wit 0 == runner.invoke(cli, ["dataset", "add", "--copy", "my-data", "--overwrite", str(directory_tree)]).exit_code ) - dataset = load_dataset_with_injection("my-data", client) + dataset = get_dataset_with_injection("my-data") assert_dataset_is_mutated(old=old_dataset, new=dataset) old_dataset = dataset @@ -2084,46 +2093,46 @@ def test_immutability_for_files(directory_tree, runner, client, load_dataset_wit # Remove some files assert 0 == runner.invoke(cli, ["dataset", "unlink", "my-data", "-I", "file1", "--yes"]).exit_code - dataset = load_dataset_with_injection("my-data", client) + dataset = get_dataset_with_injection("my-data") assert_dataset_is_mutated(old=old_dataset, new=dataset) -def test_immutability_for_adding_files_twice(directory_tree, runner, client, load_dataset_with_injection): +def test_immutability_for_adding_files_twice(directory_tree, runner, project): """Test dataset's ID does not change changes if the same files are added again.""" assert 0 == runner.invoke(cli, ["dataset", "add", "--copy", "my-data", "--create", str(directory_tree)]).exit_code - old_dataset = load_dataset_with_injection("my-data", client) + old_dataset = get_dataset_with_injection("my-data") assert 1 == runner.invoke(cli, ["dataset", "add", "--copy", "my-data", str(directory_tree)]).exit_code - dataset = load_dataset_with_injection("my-data", client) + dataset = get_dataset_with_injection("my-data") assert old_dataset.id == dataset.id -def test_immutability_after_external_update(runner, client, directory_tree, load_dataset_with_injection): +def test_immutability_after_external_update(runner, project, directory_tree): """Test dataset's ID changes after updating external files.""" assert 0 == runner.invoke(cli, ["dataset", "add", "-c", "--external", "my-data", str(directory_tree)]).exit_code - old_dataset = load_dataset_with_injection("my-data", client) + old_dataset = get_dataset_with_injection("my-data") directory_tree.joinpath("file1").write_text("some updates") result = runner.invoke(cli, ["dataset", "update", "my-data"]) assert 0 == result.exit_code, result.output - dataset = load_dataset_with_injection("my-data", client) + dataset = get_dataset_with_injection("my-data") assert_dataset_is_mutated(old=old_dataset, new=dataset) -def test_immutability_after_no_update(runner, client, directory_tree, load_dataset_with_injection): +def test_immutability_after_no_update(runner, project, directory_tree): """Test dataset's ID does not changes if no external file is updated.""" assert 0 == runner.invoke(cli, ["dataset", "add", "-c", "--external", "my-data", str(directory_tree)]).exit_code - old_dataset = load_dataset_with_injection("my-data", client) + old_dataset = get_dataset_with_injection("my-data") assert 0 == runner.invoke(cli, ["dataset", "update", "--all"]).exit_code - dataset = load_dataset_with_injection("my-data", client) + dataset = get_dataset_with_injection("my-data") assert dataset.id == old_dataset.id -def test_datasets_provenance_after_create(runner, client, get_datasets_provenance_with_injection): +def test_datasets_provenance_after_create(runner, project): """Test datasets provenance is updated after creating a dataset.""" args = [ "dataset", @@ -2144,7 +2153,7 @@ def test_datasets_provenance_after_create(runner, client, get_datasets_provenanc ] assert 0 == runner.invoke(cli, args, catch_exceptions=False).exit_code - with get_datasets_provenance_with_injection(client) as datasets_provenance: + with get_datasets_provenance_with_injection() as datasets_provenance: dataset = datasets_provenance.get_by_name("my-data") assert "Long Title" == dataset.title @@ -2160,14 +2169,14 @@ def test_datasets_provenance_after_create(runner, client, get_datasets_provenanc assert dataset.same_as is None assert [] == dataset.dataset_files - assert not project_context.repository.is_dirty(untracked_files=True) + assert not project.repository.is_dirty(untracked_files=True) -def test_datasets_provenance_after_create_when_adding(runner, client, get_datasets_provenance_with_injection): +def test_datasets_provenance_after_create_when_adding(runner, project): """Test datasets provenance is updated after creating a dataset.""" assert 0 == runner.invoke(cli, ["dataset", "add", "--copy", "--create", "my-data", "README.md"]).exit_code - with get_datasets_provenance_with_injection(client) as datasets_provenance: + with get_datasets_provenance_with_injection() as datasets_provenance: dataset = datasets_provenance.get_by_name("my-data") assert dataset.initial_identifier == dataset.identifier @@ -2175,19 +2184,17 @@ def test_datasets_provenance_after_create_when_adding(runner, client, get_datase assert dataset.same_as is None assert {"README.md"} == {Path(f.entity.path).name for f in dataset.dataset_files} - assert not project_context.repository.is_dirty(untracked_files=True) + assert not project.repository.is_dirty(untracked_files=True) -def test_datasets_provenance_after_edit( - runner, client, load_dataset_with_injection, get_datasets_provenance_with_injection -): +def test_datasets_provenance_after_edit(runner, project): """Test datasets provenance is updated after editing a dataset.""" assert 0 == runner.invoke(cli, ["dataset", "create", "my-data"]).exit_code assert 0 == runner.invoke(cli, ["dataset", "edit", "my-data", "-k", "new-data"], catch_exceptions=False).exit_code - dataset = load_dataset_with_injection("my-data", client) + dataset = get_dataset_with_injection("my-data") - with get_datasets_provenance_with_injection(client) as datasets_provenance: + with get_datasets_provenance_with_injection() as datasets_provenance: current_version = datasets_provenance.get_by_name("my-data") old_version = datasets_provenance.get_previous_version(current_version) @@ -2198,7 +2205,7 @@ def test_datasets_provenance_after_edit( assert {"new-data"} == set(current_version.keywords) -def test_datasets_provenance_after_add(runner, client, directory_tree, get_datasets_provenance_with_injection): +def test_datasets_provenance_after_add(runner, project, directory_tree): """Test datasets provenance is updated after adding data to a dataset.""" assert ( 0 @@ -2207,12 +2214,12 @@ def test_datasets_provenance_after_add(runner, client, directory_tree, get_datas ).exit_code ) - with get_datasets_provenance_with_injection(client) as datasets_provenance: + with get_datasets_provenance_with_injection() as datasets_provenance: dataset = datasets_provenance.get_by_name("my-data") path = os.path.join(DATA_DIR, "my-data", "file1") file = dataset.find_file(path) - object_hash = project_context.repository.get_object_hash(path=path) + object_hash = project.repository.get_object_hash(path=path) assert object_hash in file.entity.id assert path in file.entity.id @@ -2220,9 +2227,7 @@ def test_datasets_provenance_after_add(runner, client, directory_tree, get_datas assert path == file.entity.path -def test_datasets_provenance_after_multiple_adds( - runner, client, directory_tree, get_datasets_provenance_with_injection -): +def test_datasets_provenance_after_multiple_adds(runner, project, directory_tree): """Test datasets provenance is re-using DatasetFile objects after multiple adds.""" assert ( 0 == runner.invoke(cli, ["dataset", "add", "--copy", "my-data", "-c", str(directory_tree / "dir1")]).exit_code @@ -2230,7 +2235,7 @@ def test_datasets_provenance_after_multiple_adds( assert 0 == runner.invoke(cli, ["dataset", "add", "--copy", "my-data", str(directory_tree / "file1")]).exit_code - with get_datasets_provenance_with_injection(client) as datasets_provenance: + with get_datasets_provenance_with_injection() as datasets_provenance: provenance = datasets_provenance.get_provenance_tails() assert 1 == len(provenance) @@ -2246,9 +2251,7 @@ def test_datasets_provenance_after_multiple_adds( assert file2.id in old_dataset_file_ids -def test_datasets_provenance_after_add_with_overwrite( - runner, client, directory_tree, get_datasets_provenance_with_injection -): +def test_datasets_provenance_after_add_with_overwrite(runner, project, directory_tree): """Test datasets provenance is updated if adding and overwriting same files.""" assert 0 == runner.invoke(cli, ["dataset", "add", "--copy", "my-data", "--create", str(directory_tree)]).exit_code time.sleep(1) @@ -2256,7 +2259,7 @@ def test_datasets_provenance_after_add_with_overwrite( 0 == runner.invoke(cli, ["dataset", "add", "--copy", "my-data", "--overwrite", str(directory_tree)]).exit_code ) - with get_datasets_provenance_with_injection(client) as datasets_provenance: + with get_datasets_provenance_with_injection() as datasets_provenance: provenance = datasets_provenance.get_provenance_tails() assert 1 == len(provenance) @@ -2271,15 +2274,13 @@ def test_datasets_provenance_after_add_with_overwrite( assert dataset_file.id not in old_dataset_file_ids -def test_datasets_provenance_after_file_unlink( - runner, client, directory_tree, load_dataset_with_injection, get_datasets_provenance_with_injection -): +def test_datasets_provenance_after_file_unlink(runner, project, directory_tree): """Test datasets provenance is updated after removing data.""" assert 0 == runner.invoke(cli, ["dataset", "add", "--copy", "my-data", "-c", str(directory_tree)]).exit_code assert 0 == runner.invoke(cli, ["dataset", "unlink", "my-data", "--include", "*/dir1/*"], input="y").exit_code - dataset = load_dataset_with_injection("my-data", client) - with get_datasets_provenance_with_injection(client) as datasets_provenance: + dataset = get_dataset_with_injection("my-data") + with get_datasets_provenance_with_injection() as datasets_provenance: current_version = datasets_provenance.get_by_name("my-data") old_version = datasets_provenance.get_by_id(Dataset.generate_id(dataset.initial_identifier)) path = os.path.join(DATA_DIR, "my-data", directory_tree.name, "file1") @@ -2293,17 +2294,15 @@ def test_datasets_provenance_after_file_unlink( assert current_version.identifier != current_version.initial_identifier -def test_datasets_provenance_after_remove( - runner, client, directory_tree, load_dataset_with_injection, get_datasets_provenance_with_injection -): +def test_datasets_provenance_after_remove(runner, project, directory_tree): """Test datasets provenance is updated after removing a dataset.""" assert 0 == runner.invoke(cli, ["dataset", "add", "--copy", "my-data", "-c", str(directory_tree)]).exit_code - dataset = load_dataset_with_injection("my-data", client) + dataset = get_dataset_with_injection("my-data") assert 0 == runner.invoke(cli, ["dataset", "rm", "my-data"]).exit_code - with get_datasets_provenance_with_injection(client) as datasets_provenance: + with get_datasets_provenance_with_injection() as datasets_provenance: current_version = datasets_provenance.get_by_name("my-data") provenance = datasets_provenance.get_provenance_tails() @@ -2318,14 +2317,14 @@ def test_datasets_provenance_after_remove( @pytest.mark.serial -def test_datasets_provenance_after_update(runner, client, directory_tree, get_datasets_provenance_with_injection): +def test_datasets_provenance_after_update(runner, project, directory_tree): """Test datasets provenance is updated after updating a dataset.""" assert 0 == runner.invoke(cli, ["dataset", "add", "-c", "--external", "my-data", str(directory_tree)]).exit_code directory_tree.joinpath("file1").write_text("some updates") assert 0 == runner.invoke(cli, ["dataset", "update", "--all"]).exit_code - with get_datasets_provenance_with_injection(client) as datasets_provenance: + with get_datasets_provenance_with_injection() as datasets_provenance: current_version = datasets_provenance.get_by_name("my-data") assert current_version.identifier != current_version.initial_identifier @@ -2334,17 +2333,15 @@ def test_datasets_provenance_after_update(runner, client, directory_tree, get_da assert 0 == result.exit_code, format_result_exception(result) -def test_datasets_provenance_after_adding_tag( - runner, client, get_datasets_provenance_with_injection, load_dataset_with_injection -): +def test_datasets_provenance_after_adding_tag(runner, project): """Test datasets provenance is updated after tagging a dataset.""" assert 0 == runner.invoke(cli, ["dataset", "create", "my-data"]).exit_code - old_dataset = load_dataset_with_injection("my-data", client) + old_dataset = get_dataset_with_injection("my-data") assert 0 == runner.invoke(cli, ["dataset", "tag", "my-data", "42.0"]).exit_code - with get_datasets_provenance_with_injection(client) as datasets_provenance: + with get_datasets_provenance_with_injection() as datasets_provenance: provenance = datasets_provenance.get_provenance_tails() current_version = datasets_provenance.get_by_name("my-data") @@ -2352,21 +2349,19 @@ def test_datasets_provenance_after_adding_tag( assert current_version.identifier == current_version.initial_identifier assert current_version.derived_from is None assert current_version.identifier == old_dataset.identifier - assert not project_context.repository.is_dirty(untracked_files=True) + assert not project.repository.is_dirty(untracked_files=True) -def test_datasets_provenance_after_removing_tag( - runner, client, get_datasets_provenance_with_injection, load_dataset_with_injection -): +def test_datasets_provenance_after_removing_tag(runner, project): """Test datasets provenance is updated after removing a dataset's tag.""" assert 0 == runner.invoke(cli, ["dataset", "create", "my-data"]).exit_code assert 0 == runner.invoke(cli, ["dataset", "tag", "my-data", "42.0"]).exit_code - old_dataset = load_dataset_with_injection("my-data", client) + old_dataset = get_dataset_with_injection("my-data") assert 0 == runner.invoke(cli, ["dataset", "rm-tags", "my-data", "42.0"]).exit_code - with get_datasets_provenance_with_injection(client) as datasets_provenance: + with get_datasets_provenance_with_injection() as datasets_provenance: provenance = datasets_provenance.get_provenance_tails() current_version = datasets_provenance.get_by_name("my-data") @@ -2374,20 +2369,18 @@ def test_datasets_provenance_after_removing_tag( assert current_version.identifier == current_version.initial_identifier assert current_version.derived_from is None assert current_version.identifier == old_dataset.identifier - assert not project_context.repository.is_dirty(untracked_files=True) + assert not project.repository.is_dirty(untracked_files=True) -def test_datasets_provenance_multiple( - runner, client, directory_tree, load_dataset_with_injection, get_datasets_provenance_with_injection -): +def test_datasets_provenance_multiple(runner, project, directory_tree): """Test datasets provenance is updated after multiple dataset operations.""" assert 0 == runner.invoke(cli, ["dataset", "create", "my-data"]).exit_code - v1 = load_dataset_with_injection("my-data", client) + v1 = get_dataset_with_injection("my-data") assert 0 == runner.invoke(cli, ["dataset", "edit", "my-data", "-k", "new-data"]).exit_code assert 0 == runner.invoke(cli, ["dataset", "add", "--copy", "my-data", str(directory_tree)]).exit_code assert 0 == runner.invoke(cli, ["dataset", "unlink", "my-data", "--include", "*/dir1/*"], input="y").exit_code - with get_datasets_provenance_with_injection(client) as datasets_provenance: + with get_datasets_provenance_with_injection() as datasets_provenance: tail_dataset = datasets_provenance.get_by_name("my-data", immutable=True) provenance = datasets_provenance.get_provenance_tails() @@ -2404,19 +2397,19 @@ def test_datasets_provenance_multiple( assert v1.identifier == tail_dataset.initial_identifier -def test_datasets_provenance_add_file(runner, client, directory_tree, load_dataset_with_injection): +def test_datasets_provenance_add_file(runner, project, directory_tree): """Test add to dataset using graph command.""" file1 = str(directory_tree.joinpath("file1")) assert 0 == runner.invoke(cli, ["dataset", "add", "--copy", "--create", "my-data", file1]).exit_code dir1 = str(directory_tree.joinpath("dir1")) assert 0 == runner.invoke(cli, ["dataset", "add", "--copy", "my-data", dir1]).exit_code - dataset = load_dataset_with_injection("my-data", client) + dataset = get_dataset_with_injection("my-data") assert {"file1", "file2", "file3"} == {Path(f.entity.path).name for f in dataset.files} -def test_immutability_of_dataset_files(runner, client, directory_tree, load_dataset_with_injection): +def test_immutability_of_dataset_files(runner, project, directory_tree): """Test DatasetFiles are generated when their Entity changes.""" assert ( 0 == runner.invoke(cli, ["dataset", "add", "--copy", "my-data", "-c", str(directory_tree / "file1")]).exit_code @@ -2424,12 +2417,12 @@ def test_immutability_of_dataset_files(runner, client, directory_tree, load_data file1 = os.path.join(DATA_DIR, "my-data", "file1") - v1 = load_dataset_with_injection("my-data", client).find_file(file1) + v1 = get_dataset_with_injection("my-data").find_file(file1) # DatasetFile changes when Entity is changed - write_and_commit_file(project_context.repository, file1, "changed content", commit=False) + write_and_commit_file(project.repository, file1, "changed content", commit=False) assert 0 == runner.invoke(cli, ["dataset", "update", "--all"]).exit_code - v2 = load_dataset_with_injection("my-data", client).find_file(file1) + v2 = get_dataset_with_injection("my-data").find_file(file1) assert v1.id != v2.id @@ -2438,7 +2431,7 @@ def test_immutability_of_dataset_files(runner, client, directory_tree, load_data 0 == runner.invoke(cli, ["dataset", "add", "--copy", "my-data", str(directory_tree / "dir1" / "file2")]).exit_code ) - v3 = load_dataset_with_injection("my-data", client).find_file(file1) + v3 = get_dataset_with_injection("my-data").find_file(file1) assert v2.id == v3.id @@ -2449,13 +2442,13 @@ def test_immutability_of_dataset_files(runner, client, directory_tree, load_data cli, ["dataset", "add", "--copy", "my-data", "--overwrite", str(directory_tree / "file1")] ).exit_code ) - v4 = load_dataset_with_injection("my-data", client).find_file(file1) + v4 = get_dataset_with_injection("my-data").find_file(file1) assert v3.id != v4.id # DatasetFile changes if the file is removed assert 0 == runner.invoke(cli, ["dataset", "unlink", "my-data", "--include", "file1"], input="y").exit_code - dataset = load_dataset_with_injection("my-data", client) + dataset = get_dataset_with_injection("my-data") v5 = next(f for f in dataset.dataset_files if f.is_removed()) assert "file1" in v5.entity.path @@ -2463,7 +2456,7 @@ def test_immutability_of_dataset_files(runner, client, directory_tree, load_data @pytest.mark.serial -def test_unauthorized_import(mock_kg, client, runner): +def test_unauthorized_import(mock_kg, runner, project): """Test importing without a valid token.""" set_value("http", "renku.ch", "not-renku-token", global_only=True) @@ -2477,7 +2470,7 @@ def test_unauthorized_import(mock_kg, client, runner): @pytest.mark.serial -def test_authorized_import(mock_kg, client, runner): +def test_authorized_import(mock_kg, runner, project): """Test importing with a valid token. NOTE: Returning 404 from KG means that the request was authorized. We don't implement a full import due to mocking @@ -2492,8 +2485,8 @@ def test_authorized_import(mock_kg, client, runner): assert "Cannot find project in the knowledge graph" in result.output -@pytest.mark.parametrize("datadir_option,datadir", [([], f"{DATA_DIR}/my-data"), (["--datadir", "mydir"], "mydir")]) -def test_update_local_file(runner, client, directory_tree, load_dataset_with_injection, datadir_option, datadir): +@pytest.mark.parametrize("datadir_option,datadir", [([], f"{DATA_DIR}/my-data"), (["--datadir", "my-dir"], "my-dir")]) +def test_update_local_file(runner, project, directory_tree, datadir_option, datadir): """Check updating local files.""" assert ( 0 @@ -2504,15 +2497,15 @@ def test_update_local_file(runner, client, directory_tree, load_dataset_with_inj file1 = Path(datadir) / directory_tree.name / "file1" file1.write_text("some updates") - new_checksum_file1 = project_context.repository.get_object_hash(file1) + new_checksum_file1 = project.repository.get_object_hash(file1) file2 = Path(datadir) / directory_tree.name / "dir1" / "file2" file2.write_text("some updates") - new_checksum_file2 = project_context.repository.get_object_hash(file2) + new_checksum_file2 = project.repository.get_object_hash(file2) - commit_sha_before_update = project_context.repository.head.commit.hexsha + commit_sha_before_update = project.repository.head.commit.hexsha - old_dataset = load_dataset_with_injection("my-data", client) + old_dataset = get_dataset_with_injection("my-data") assert new_checksum_file1 != old_dataset.find_file(file1).entity.checksum assert new_checksum_file2 != old_dataset.find_file(file2).entity.checksum @@ -2525,27 +2518,25 @@ def test_update_local_file(runner, client, directory_tree, load_dataset_with_inj assert "The following files will be deleted" not in result.output assert str(file1) in result.output assert str(file2) in result.output - assert commit_sha_before_update == project_context.repository.head.commit.hexsha - assert project_context.repository.is_dirty(untracked_files=True) + assert commit_sha_before_update == project.repository.head.commit.hexsha + assert project.repository.is_dirty(untracked_files=True) result = runner.invoke(cli, ["dataset", "update", "my-data", "--no-local"]) assert 0 == result.exit_code, format_result_exception(result) - assert commit_sha_before_update == project_context.repository.head.commit.hexsha + assert commit_sha_before_update == project.repository.head.commit.hexsha result = runner.invoke(cli, ["dataset", "update", "my-data"]) assert 0 == result.exit_code, format_result_exception(result) - assert not project_context.repository.is_dirty(untracked_files=True) - dataset = load_dataset_with_injection("my-data", client) + assert not project.repository.is_dirty(untracked_files=True) + dataset = get_dataset_with_injection("my-data") assert new_checksum_file1 == dataset.find_file(file1).entity.checksum assert new_checksum_file2 == dataset.find_file(file2).entity.checksum assert_dataset_is_mutated(old=old_dataset, new=dataset) -@pytest.mark.parametrize("datadir_option,datadir", [([], f"{DATA_DIR}/my-data"), (["--datadir", "mydir"], "mydir")]) -def test_update_local_file_in_datadir( - runner, client, directory_tree, load_dataset_with_injection, datadir_option, datadir -): +@pytest.mark.parametrize("datadir_option,datadir", [([], f"{DATA_DIR}/my-data"), (["--datadir", "my-dir"], "my-dir")]) +def test_update_local_file_in_datadir(runner, project, directory_tree, datadir_option, datadir): """Check updating local files dropped in the datadir.""" assert ( 0 @@ -2561,7 +2552,7 @@ def test_update_local_file_in_datadir( file2 = folder / "another_new_file" file2.write_text("some updates") - old_dataset = load_dataset_with_injection("my-data", client) + old_dataset = get_dataset_with_injection("my-data") # NOTE: Update dry run result = runner.invoke( @@ -2574,7 +2565,7 @@ def test_update_local_file_in_datadir( assert str(file1) in result.output assert str(file2) in result.output - assert project_context.repository.is_dirty(untracked_files=True) + assert project.repository.is_dirty(untracked_files=True) result = runner.invoke( cli, ["dataset", "update", "my-data", "--check-data-directory", "--no-remote", "--no-external"] @@ -2582,22 +2573,22 @@ def test_update_local_file_in_datadir( assert 0 == result.exit_code, format_result_exception(result) - assert not project_context.repository.is_dirty(untracked_files=True) - dataset = load_dataset_with_injection("my-data", client) + assert not project.repository.is_dirty(untracked_files=True) + dataset = get_dataset_with_injection("my-data") assert dataset.find_file(file1) assert dataset.find_file(file2) assert_dataset_is_mutated(old=old_dataset, new=dataset) -def test_update_local_deleted_file(runner, client, directory_tree, load_dataset_with_injection): +def test_update_local_deleted_file(runner, project, directory_tree): """Check updating local deleted files.""" assert 0 == runner.invoke(cli, ["dataset", "add", "--copy", "-c", "my-data", str(directory_tree)]).exit_code file1 = Path(DATA_DIR) / "my-data" / directory_tree.name / "file1" file1.unlink() - project_context.repository.add(all=True) - project_context.repository.commit("deleted file1") - commit_sha_after_file1_delete = project_context.repository.head.commit.hexsha + project.repository.add(all=True) + project.repository.commit("deleted file1") + commit_sha_after_file1_delete = project.repository.head.commit.hexsha # NOTE: Update dry run result = runner.invoke(cli, ["dataset", "update", "--all", "--dry-run"]) @@ -2606,8 +2597,8 @@ def test_update_local_deleted_file(runner, client, directory_tree, load_dataset_ assert "The following files will be updated" not in result.output assert "The following files will be deleted" in result.output assert str(file1) in result.output - assert commit_sha_after_file1_delete == project_context.repository.head.commit.hexsha - assert not project_context.repository.is_dirty(untracked_files=True) + assert commit_sha_after_file1_delete == project.repository.head.commit.hexsha + assert not project.repository.is_dirty(untracked_files=True) # NOTE: Update without `--delete` result = runner.invoke(cli, ["dataset", "update", "my-data"]) @@ -2615,8 +2606,8 @@ def test_update_local_deleted_file(runner, client, directory_tree, load_dataset_ assert 0 == result.exit_code, format_result_exception(result) assert "Some files are deleted:" in result.output assert "Updated 0 files" in result.output - assert commit_sha_after_file1_delete == project_context.repository.head.commit.hexsha - old_dataset = load_dataset_with_injection("my-data", client) + assert commit_sha_after_file1_delete == project.repository.head.commit.hexsha + old_dataset = get_dataset_with_injection("my-data") assert old_dataset.find_file(file1) # NOTE: Update with `--delete` @@ -2624,8 +2615,8 @@ def test_update_local_deleted_file(runner, client, directory_tree, load_dataset_ assert 0 == result.exit_code, format_result_exception(result) assert "Updated 0 files and deleted 1 files" in result.output - assert commit_sha_after_file1_delete != project_context.repository.head.commit.hexsha - dataset = load_dataset_with_injection("my-data", client) + assert commit_sha_after_file1_delete != project.repository.head.commit.hexsha + dataset = get_dataset_with_injection("my-data") assert dataset.find_file(file1) is None assert_dataset_is_mutated(old=old_dataset, new=dataset) @@ -2635,7 +2626,7 @@ def test_update_local_deleted_file(runner, client, directory_tree, load_dataset_ assert "Updated 0 files and deleted 0 files" in result.output -def test_update_mixed_types(runner, client, directory_tree, load_dataset_with_injection): +def test_update_mixed_types(runner, project, directory_tree): """Check updating datasets with mixed local and external files.""" external_file = directory_tree / "file1" assert 0 == runner.invoke(cli, ["dataset", "add", "--copy", "-c", "my-data", "-e", str(external_file)]).exit_code @@ -2647,10 +2638,10 @@ def test_update_mixed_types(runner, client, directory_tree, load_dataset_with_in external_file.write_text("some external updates") file2 = Path(DATA_DIR) / "my-data" / "file2" - write_and_commit_file(project_context.repository, file2, "some updates", commit=False) - new_checksum_file2 = project_context.repository.get_object_hash(file2) + write_and_commit_file(project.repository, file2, "some updates", commit=False) + new_checksum_file2 = project.repository.get_object_hash(file2) - old_dataset = load_dataset_with_injection("my-data", client) + old_dataset = get_dataset_with_injection("my-data") result = runner.invoke(cli, ["dataset", "update", "my-data"]) @@ -2658,12 +2649,12 @@ def test_update_mixed_types(runner, client, directory_tree, load_dataset_with_in file1 = Path(DATA_DIR) / "my-data" / "file1" assert "some external updates" == file1.read_text() - dataset = load_dataset_with_injection("my-data", client) + dataset = get_dataset_with_injection("my-data") assert new_checksum_file2 == dataset.find_file(file2).entity.checksum assert_dataset_is_mutated(old=old_dataset, new=dataset) -def test_update_with_no_dataset(runner, client): +def test_update_with_no_dataset(runner, project): """Check updating a project with no dataset should not raise an error.""" result = runner.invoke(cli, ["dataset", "update", "--all"]) diff --git a/tests/cli/test_gc.py b/tests/cli/test_gc.py index 5c62b3ebf8..1a99ec0875 100644 --- a/tests/cli/test_gc.py +++ b/tests/cli/test_gc.py @@ -18,35 +18,34 @@ """Test ``gc`` command.""" from renku.core.constant import CACHE, RENKU_HOME, RENKU_TMP -from renku.domain_model.project_context import project_context from renku.ui.cli import cli from tests.utils import format_result_exception -def test_gc(runner, client): +def test_gc(runner, project): """Test clean caches and temporary files.""" # NOTE: Mock caches - tmp = project_context.path / RENKU_HOME / RENKU_TMP + tmp = project.path / RENKU_HOME / RENKU_TMP tmp.mkdir(parents=True, exist_ok=True) (tmp / "temp-file").touch() - cache = project_context.path / RENKU_HOME / CACHE + cache = project.path / RENKU_HOME / CACHE cache.mkdir(parents=True, exist_ok=True) (tmp / "cache").touch() - (project_context.path / "tracked").write_text("tracked file") - client.add("tracked") + (project.path / "tracked").write_text("tracked file") + project.repository.add("tracked") - (project_context.path / "untracked").write_text("untracked file") + (project.path / "untracked").write_text("untracked file") - commit_sha_before = client.head.commit.hexsha + commit_sha_before = project.repository.head.commit.hexsha result = runner.invoke(cli, ["gc"]) - commit_sha_after = client.head.commit.hexsha + commit_sha_after = project.repository.head.commit.hexsha assert 0 == result.exit_code, format_result_exception(result) assert not tmp.exists() assert not cache.exists() - assert "tracked" in [f.a_path for f in client.staged_changes] - assert "untracked" in client.untracked_files + assert "tracked" in [f.a_path for f in project.repository.staged_changes] + assert "untracked" in project.repository.untracked_files assert commit_sha_after == commit_sha_before diff --git a/tests/cli/test_gitignore.py b/tests/cli/test_gitignore.py index 0013a0d296..b070953a2f 100644 --- a/tests/cli/test_gitignore.py +++ b/tests/cli/test_gitignore.py @@ -37,8 +37,8 @@ def test_dataset_add(tmpdir, runner, project, subdirectory): assert 1 == result.exit_code - project.reset(hard=True) - project.clean() + project.repository.reset(hard=True) + project.repository.clean() # Use the --force ;) result = runner.invoke(cli, ["dataset", "add", "--copy", "testing", "--force", ignored_file.strpath]) diff --git a/tests/cli/test_graph.py b/tests/cli/test_graph.py index d9abb9623f..95c7635f4a 100644 --- a/tests/cli/test_graph.py +++ b/tests/cli/test_graph.py @@ -28,12 +28,12 @@ @pytest.mark.parametrize("revision", ["", "HEAD", "HEAD^", "HEAD^..HEAD"]) -def test_graph_export_validation(runner, repository, directory_tree, run, revision): +def test_graph_export_validation(runner, project, directory_tree, run, revision): """Test graph validation when exporting.""" assert 0 == runner.invoke(cli, ["dataset", "add", "--copy", "-c", "my-data", str(directory_tree)]).exit_code - file1 = repository.path / DATA_DIR / "my-data" / directory_tree.name / "file1" - file2 = repository.path / DATA_DIR / "my-data" / directory_tree.name / "dir1" / "file2" + file1 = project.path / DATA_DIR / "my-data" / directory_tree.name / "file1" + file2 = project.path / DATA_DIR / "my-data" / directory_tree.name / "dir1" / "file2" assert 0 == run(["run", "head", str(file1)], stdout="out1") assert 0 == run(["run", "tail", str(file2)], stdout="out2") @@ -53,7 +53,7 @@ def test_graph_export_validation(runner, repository, directory_tree, run, revisi assert "https://renkulab.io" in result.output # Make sure that nothing has changed during export which is a read-only operation - assert not repository.is_dirty(untracked_files=True) + assert not project.repository.is_dirty(untracked_files=True) @pytest.mark.serial @@ -112,10 +112,10 @@ def test_graph_export_strict_dataset(tmpdir, runner, project, subdirectory): assert 2 == result.output.count("http://schema.org/Dataset") -def test_graph_export_dataset_mutability(runner, project_with_datasets, client_database_injection_manager): +def test_graph_export_dataset_mutability(runner, project_with_datasets, with_injection): """Test export validation fails for datasets that have both same_as and derived_from.""" - with client_database_injection_manager(project_with_datasets): - with with_dataset(project_with_datasets, name="dataset-1", commit_database=True) as dataset: + with with_injection(): + with with_dataset(name="dataset-1", commit_database=True) as dataset: # NOTE: Set both same_as and derived_from for a dataset dataset.same_as = Url(url_str="http://example.com") dataset.derived_from = Url(url_id="datasets/abc123") diff --git a/tests/cli/test_indirect.py b/tests/cli/test_indirect.py index 52dcee8996..8ecb48c5eb 100644 --- a/tests/cli/test_indirect.py +++ b/tests/cli/test_indirect.py @@ -22,12 +22,11 @@ import pytest from renku.core.util.contexts import chdir -from renku.domain_model.project_context import project_context -def test_indirect_inputs_outputs(renku_cli, client): +def test_indirect_inputs_outputs(renku_cli, project): """Test indirect inputs/outputs that are programmatically created.""" - with chdir(project_context.path): + with chdir(project.path): Path("foo").mkdir() Path(".renku/tmp").mkdir() Path("foo/bar").touch() @@ -42,8 +41,8 @@ def test_indirect_inputs_outputs(renku_cli, client): """ ) - project_context.repository.add(all=True) - project_context.repository.commit("test setup") + project.repository.add(all=True) + project.repository.commit("test setup") exit_code, activity = renku_cli("run", "sh", "-c", "sh script.sh") @@ -61,9 +60,9 @@ def test_indirect_inputs_outputs(renku_cli, client): assert "qux" == plan.outputs[0].default_value -def test_duplicate_indirect_inputs(renku_cli, client): +def test_duplicate_indirect_inputs(renku_cli, project): """Test duplicate indirect inputs are only included once.""" - with chdir(project_context.path): + with chdir(project.path): Path("foo").mkdir() Path(".renku/tmp").mkdir() Path("foo/bar").touch() @@ -80,8 +79,8 @@ def test_duplicate_indirect_inputs(renku_cli, client): """ ) - project_context.repository.add(all=True) - project_context.repository.commit("test setup") + project.repository.add(all=True) + project.repository.commit("test setup") exit_code, activity = renku_cli("run", "--no-output", "sh", "-c", "sh script.sh", "baz") @@ -89,9 +88,9 @@ def test_duplicate_indirect_inputs(renku_cli, client): assert {"baz", "foo/bar"} == {i.default_value for i in activity.association.plan.inputs} -def test_duplicate_indirect_outputs(renku_cli, client): +def test_duplicate_indirect_outputs(renku_cli, project): """Test duplicate indirect outputs are only included once.""" - with chdir(project_context.path): + with chdir(project.path): Path("foo").mkdir() Path(".renku/tmp").mkdir() Path("foo/bar").touch() @@ -109,8 +108,8 @@ def test_duplicate_indirect_outputs(renku_cli, client): """ ) - project_context.repository.add(all=True) - project_context.repository.commit("test setup") + project.repository.add(all=True) + project.repository.commit("test setup") exit_code, activity = renku_cli("run", "sh", "-c", "sh script.sh") @@ -118,9 +117,9 @@ def test_duplicate_indirect_outputs(renku_cli, client): assert {"baz", "foo/bar"} == {o.default_value for o in activity.association.plan.outputs} -def test_indirect_parameters(renku_cli, client): +def test_indirect_parameters(renku_cli, project): """Test indirect parameters.""" - with chdir(project_context.path): + with chdir(project.path): Path(".renku/tmp").mkdir() Path("script.sh").write_text( @@ -132,8 +131,8 @@ def test_indirect_parameters(renku_cli, client): """ ) - project_context.repository.add(all=True) - project_context.repository.commit("test setup") + project.repository.add(all=True) + project.repository.commit("test setup") exit_code, activity = renku_cli("run", "--no-output", "sh", "-c", "sh script.sh") @@ -147,9 +146,9 @@ def test_indirect_parameters(renku_cli, client): @pytest.mark.skip("renku update is not implemented with new database, reenable once it is.") -def test_indirect_parameters_update(renku_cli, client): +def test_indirect_parameters_update(renku_cli, project): """Test updating of indirect parameters.""" - with chdir(project_context.path): + with chdir(project.path): Path(".renku/tmp").mkdir(exist_ok=True) Path("script.sh").write_text( @@ -160,12 +159,12 @@ def test_indirect_parameters_update(renku_cli, client): """ ) - project_context.repository.add(all=True) - project_context.repository.commit("test setup") + project.repository.add(all=True) + project.repository.commit("test setup") renku_cli("run", "sh", "script.sh", stdout="result") - with chdir(project_context.path): + with chdir(project.path): Path(".renku/tmp").mkdir(exist_ok=True) Path("script.sh").write_text( @@ -175,8 +174,8 @@ def test_indirect_parameters_update(renku_cli, client): """ ) - project_context.repository.add(all=True) - project_context.repository.commit("test setup") + project.repository.add(all=True) + project.repository.commit("test setup") exit_code, activity = renku_cli("update", "--all") diff --git a/tests/cli/test_integration_datasets.py b/tests/cli/test_integration_datasets.py index eed00d2b16..b9e356b656 100644 --- a/tests/cli/test_integration_datasets.py +++ b/tests/cli/test_integration_datasets.py @@ -16,6 +16,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """Integration tests for dataset command.""" + import os import shutil import subprocess @@ -34,13 +35,14 @@ from renku.core.util.git import get_git_user from renku.core.util.os import get_files, unmount_path from renku.domain_model.dataset import Url -from renku.domain_model.project_context import project_context from renku.infrastructure.gateway.dataset_gateway import DatasetGateway from renku.infrastructure.repository import Repository from renku.ui.cli import cli from tests.utils import ( assert_dataset_is_mutated, format_result_exception, + get_dataset_with_injection, + get_datasets_provenance_with_injection, retry_failed, with_dataset, write_and_commit_file, @@ -80,7 +82,7 @@ "https://doi.org/", ], ) -def test_dataset_import_real_doi(runner, client, doi, prefix, sleep_after, load_dataset_with_injection): +def test_dataset_import_real_doi(runner, project, doi, prefix, sleep_after): """Test dataset import for existing DOI.""" uri = prefix + doi["doi"] result = runner.invoke(cli, ["dataset", "import", uri], input="y") @@ -97,7 +99,7 @@ def test_dataset_import_real_doi(runner, client, doi, prefix, sleep_after, load_ assert 0 == result.exit_code, format_result_exception(result) + str(result.stderr_bytes) assert doi["version"] in result.output - dataset = load_dataset_with_injection(doi["name"], client) + dataset = get_dataset_with_injection(doi["name"]) assert doi["doi"] in dataset.same_as.url assert dataset.date_created is None assert dataset.date_published is not None @@ -136,14 +138,14 @@ def test_dataset_import_real_doi(runner, client, doi, prefix, sleep_after, load_ @pytest.mark.integration @retry_failed @pytest.mark.vcr -def test_dataset_import_real_param(doi, input, runner, project, sleep_after, load_dataset_with_injection): +def test_dataset_import_real_param(doi, input, runner, project, sleep_after): """Test dataset import and check metadata parsing.""" result = runner.invoke(cli, ["dataset", "import", "--name", "remote", doi], input=input) if "y" == input: assert 0 == result.exit_code, format_result_exception(result) + str(result.stderr_bytes) assert "OK" in result.output - dataset = load_dataset_with_injection("remote", project) + dataset = get_dataset_with_injection("remote") assert doi in dataset.same_as.url else: assert 1 == result.exit_code, format_result_exception(result) @@ -240,13 +242,13 @@ def test_dataset_import_real_http(runner, project, url, sleep_after): @pytest.mark.integration @retry_failed @pytest.mark.vcr -def test_dataset_import_and_extract(runner, project, sleep_after, load_dataset_with_injection): +def test_dataset_import_and_extract(runner, project, sleep_after): """Test dataset import and extract files.""" url = "https://zenodo.org/record/2658634" result = runner.invoke(cli, ["dataset", "import", "--extract", "--short-name", "remote", url], input="y") assert 0 == result.exit_code, format_result_exception(result) + str(result.stderr_bytes) - dataset = load_dataset_with_injection("remote", project) + dataset = get_dataset_with_injection("remote") extracted_file = "data/remote/quantling-pyndl-c34259c/doc/make.bat" assert dataset.find_file(extracted_file) @@ -254,7 +256,7 @@ def test_dataset_import_and_extract(runner, project, sleep_after, load_dataset_w @pytest.mark.integration @retry_failed @pytest.mark.vcr -def test_dataset_import_different_names(runner, client, sleep_after): +def test_dataset_import_different_names(runner, project, sleep_after): """Test can import same DOI under different names.""" doi = "10.5281/zenodo.2658634" result = runner.invoke(cli, ["dataset", "import", "--short-name", "name-1", doi], input="y") @@ -319,7 +321,7 @@ def test_dataset_import_preserve_names(runner, project, sleep_after): "https://dev.renku.ch/projects/renku-test-projects/dataset-import/datasets/remote-dataset/", ], ) -def test_dataset_import_renku_provider(runner, client, uri, load_dataset_with_injection): +def test_dataset_import_renku_provider(runner, project, uri): """Test dataset import from Renku datasets.""" result = runner.invoke(cli, ["dataset", "import", "--name", "my-dataset", uri], input="y") @@ -328,7 +330,7 @@ def test_dataset_import_renku_provider(runner, client, uri, load_dataset_with_in assert "business-employment-data-december-2020-quarter-csv.zip" in result.output assert "OK" in result.output - dataset = load_dataset_with_injection("my-dataset", client) + dataset = get_dataset_with_injection("my-dataset") assert "business-employment-data-december-2020-quarter-csv.zip" in [Path(f.entity.path).name for f in dataset.files] # NOTE: Check that schema:sameAs is always set to canonical dataset URI regardless of import URI @@ -351,13 +353,13 @@ def test_dataset_import_renku_provider(runner, client, uri, load_dataset_with_in "remote-dataset", ], ) -def test_dataset_import_renku_provider_with_subgroups(runner, client, uri, load_dataset_with_injection): +def test_dataset_import_renku_provider_with_subgroups(runner, project, uri): """Test dataset import from Renku datasets in projects within subgroups.""" result = runner.invoke(cli, ["dataset", "import", "--name", "my-dataset", uri], input="y") assert 0 == result.exit_code, format_result_exception(result) - dataset = load_dataset_with_injection("my-dataset", client) + dataset = get_dataset_with_injection("my-dataset") assert "business-employment-data-december-2020-quarter-csv.zip" in [Path(f.entity.path).name for f in dataset.files] @@ -370,7 +372,7 @@ def test_dataset_import_renku_provider_with_subgroups(runner, client, uri, load_ @pytest.mark.integration @retry_failed @pytest.mark.vcr -def test_dataset_import_renkulab_dataset_with_image(runner, project, client, client_database_injection_manager): +def test_dataset_import_renkulab_dataset_with_image(runner, project, with_injection): """Test dataset import from Renkulab projects.""" result = runner.invoke( cli, ["dataset", "import", "https://dev.renku.ch/datasets/4f36f891bb7c4b2bab137633cc270a40"], input="y" @@ -386,7 +388,7 @@ def test_dataset_import_renkulab_dataset_with_image(runner, project, client, cli assert 0 == result.exit_code, format_result_exception(result) assert "bla" in result.output - with client_database_injection_manager(client): + with with_injection(): dataset = [d for d in DatasetGateway().get_all_active_datasets()][0] assert 2 == len(dataset.images) img1 = next((i for i in dataset.images if i.position == 1)) @@ -394,7 +396,7 @@ def test_dataset_import_renkulab_dataset_with_image(runner, project, client, cli assert img1.content_url == "https://example.com/image1.jpg" assert img2.content_url.endswith("/2.png") - assert os.path.exists(project_context.path / img2.content_url) + assert os.path.exists(project.path / img2.content_url) @pytest.mark.integration @@ -403,14 +405,12 @@ def test_dataset_import_renkulab_dataset_with_image(runner, project, client, cli @pytest.mark.parametrize( "datadir_option,expected_datadir", [([], Path(DATA_DIR) / "remote"), (["--datadir", "mydir"], Path("mydir"))] ) -def test_import_renku_dataset_preserves_directory_hierarchy( - runner, project, load_dataset_with_injection, datadir_option, expected_datadir -): +def test_import_renku_dataset_preserves_directory_hierarchy(runner, project, datadir_option, expected_datadir): """Test dataset imported from Renku projects have correct directory hierarchy.""" url = "https://dev.renku.ch/datasets/1a637fd1a7a64d1fb9aa157e7033cd1c" assert 0 == runner.invoke(cli, ["dataset", "import", "--yes", "--name", "remote", url] + datadir_option).exit_code - dataset = load_dataset_with_injection("remote", project) + dataset = get_dataset_with_injection("remote") paths = ["README.md", os.path.join("python", "data", "README.md"), os.path.join("r", "data", "README.md")] data_dir = Path(dataset.get_datadir()) @@ -426,7 +426,7 @@ def test_import_renku_dataset_preserves_directory_hierarchy( @retry_failed @pytest.mark.vcr @pytest.mark.parametrize("url", ["https://dev.renku.ch/datasets/e3e1beba05594fdd8e4682963cec9fe2"]) -def test_dataset_import_renku_fail(runner, client, monkeypatch, url): +def test_dataset_import_renku_fail(runner, project, monkeypatch, url): """Test dataset import fails if cannot clone repo.""" from renku.core.dataset.providers import renku @@ -446,7 +446,7 @@ def clone_renku_repository_mock(*_, **__): @retry_failed @pytest.mark.vcr @pytest.mark.parametrize("url", ["https://dev.renku.ch/datasets/e3e1beba-0559-4fdd-8e46-82963cec9fe2"]) -def test_dataset_import_renku_missing_project(runner, client, missing_kg_project_responses, url): +def test_dataset_import_renku_missing_project(runner, project, missing_kg_project_responses, url): """Test dataset import fails if cannot find project in KG.""" result = runner.invoke(cli, ["dataset", "import", url], input="y") assert 1 == result.exit_code @@ -539,7 +539,7 @@ def test_renku_dataset_import_missing_lfs_objects(runner, project): def test_dataset_export_upload_file( runner, tmpdir, - client, + project, zenodo_sandbox, dataverse_demo, olos_sandbox, @@ -547,7 +547,7 @@ def test_dataset_export_upload_file( params, output, input, - client_database_injection_manager, + with_injection, ): """Test successful uploading of a file to Zenodo/Dataverse deposit.""" result = runner.invoke(cli, ["dataset", "create", "my-dataset"]) @@ -563,13 +563,13 @@ def test_dataset_export_upload_file( result = runner.invoke(cli, ["dataset", "add", "--copy", "my-dataset", str(new_file)]) assert 0 == result.exit_code, format_result_exception(result) + str(result.stderr_bytes) - with client_database_injection_manager(client): - with with_dataset(client, name="my-dataset", commit_database=True) as dataset: + with with_injection(): + with with_dataset(name="my-dataset", commit_database=True) as dataset: dataset.description = "awesome dataset" dataset.creators[0].affiliation = "eth" - project_context.repository.add(all=True) - project_context.repository.commit("metadata updated") + project.repository.add(all=True) + project.repository.commit("metadata updated") result = runner.invoke( cli, ["dataset", "export", "my-dataset", provider] + params, input=input, catch_exceptions=False @@ -597,7 +597,7 @@ def test_dataset_export_upload_file( def test_dataset_export_upload_tag( runner, tmpdir, - client, + project, zenodo_sandbox, dataverse_demo, olos_sandbox, @@ -605,7 +605,7 @@ def test_dataset_export_upload_tag( params, output, input, - client_database_injection_manager, + with_injection, ): """Test successful uploading of a file to Zenodo/Dataverse deposit.""" result = runner.invoke(cli, ["dataset", "create", "my-dataset"]) @@ -620,13 +620,13 @@ def test_dataset_export_upload_tag( result = runner.invoke(cli, ["dataset", "add", "--copy", "my-dataset", str(new_file)]) assert 0 == result.exit_code, format_result_exception(result) + str(result.stderr_bytes) - with client_database_injection_manager(client): - with with_dataset(client, name="my-dataset", commit_database=True) as dataset: + with with_injection(): + with with_dataset(name="my-dataset", commit_database=True) as dataset: dataset.description = "awesome dataset" dataset.creators[0].affiliation = "eth" - project_context.repository.add(all=True) - project_context.repository.commit("metadata updated") + project.repository.add(all=True) + project.repository.commit("metadata updated") # tag dataset result = runner.invoke(cli, ["dataset", "tag", "my-dataset", "1.0"]) @@ -722,7 +722,7 @@ def test_dataset_export_to_local(runner, tmp_path): def test_dataset_export_upload_multiple( runner, tmpdir, - client, + project, zenodo_sandbox, dataverse_demo, olos_sandbox, @@ -730,7 +730,7 @@ def test_dataset_export_upload_multiple( params, output, input, - client_database_injection_manager, + with_injection, ): """Test successful uploading of a files to Zenodo deposit.""" result = runner.invoke(cli, ["dataset", "create", "my-dataset"]) @@ -749,13 +749,13 @@ def test_dataset_export_upload_multiple( result = runner.invoke(cli, ["dataset", "add", "--copy", "my-dataset"] + paths, catch_exceptions=False) assert 0 == result.exit_code, format_result_exception(result) + str(result.stderr_bytes) - with client_database_injection_manager(client): - with with_dataset(client, name="my-dataset", commit_database=True) as dataset: + with with_injection(): + with with_dataset(name="my-dataset", commit_database=True) as dataset: dataset.description = "awesome dataset" dataset.creators[0].affiliation = "eth" - project_context.repository.add(all=True) - project_context.repository.commit("metadata updated") + project.repository.add(all=True) + project.repository.commit("metadata updated") result = runner.invoke(cli, ["dataset", "export", "my-dataset", provider] + params, input=input) @@ -767,7 +767,7 @@ def test_dataset_export_upload_multiple( @pytest.mark.integration @retry_failed @pytest.mark.vcr -def test_dataset_export_upload_failure(runner, tmpdir, client, zenodo_sandbox): +def test_dataset_export_upload_failure(runner, tmpdir, project, zenodo_sandbox): """Test failed uploading of a file to Zenodo deposit.""" result = runner.invoke(cli, ["dataset", "create", "my-dataset"]) @@ -796,7 +796,9 @@ def test_dataset_export_upload_failure(runner, tmpdir, client, zenodo_sandbox): "provider,params,output", [("zenodo", [], "zenodo.org/record"), ("dataverse", ["--dataverse-name", "sdsc-published-test-dataverse"], "doi:")], ) -def test_dataset_export_published_url(runner, tmpdir, client, zenodo_sandbox, dataverse_demo, provider, params, output): +def test_dataset_export_published_url( + runner, tmpdir, project, zenodo_sandbox, dataverse_demo, provider, params, output +): """Test publishing of dataset.""" result = runner.invoke(cli, ["dataset", "create", "my-dataset"]) @@ -811,12 +813,12 @@ def test_dataset_export_published_url(runner, tmpdir, client, zenodo_sandbox, da result = runner.invoke(cli, ["dataset", "add", "--copy", "my-dataset", str(new_file)]) assert 0 == result.exit_code, format_result_exception(result) + str(result.stderr_bytes) - with with_dataset(client, name="my-dataset", commit_database=True) as dataset: + with with_dataset(name="my-dataset", commit_database=True) as dataset: dataset.description = "awesome dataset" dataset.creators[0].affiliation = "eth" - project_context.repository.add(all=True) - project_context.repository.commit("metadata updated") + project.repository.add(all=True) + project.repository.commit("metadata updated") result = runner.invoke(cli, ["dataset", "export", "my-dataset", provider, "--publish"] + params) @@ -828,7 +830,7 @@ def test_dataset_export_published_url(runner, tmpdir, client, zenodo_sandbox, da @pytest.mark.integration @retry_failed @pytest.mark.vcr -def test_export_dataset_wrong_provider(runner, project, tmpdir, client): +def test_export_dataset_wrong_provider(runner, project, tmpdir): """Test non-existing provider.""" result = runner.invoke(cli, ["dataset", "create", "my-dataset"]) @@ -850,7 +852,7 @@ def test_export_dataset_wrong_provider(runner, project, tmpdir, client): @pytest.mark.integration @pytest.mark.parametrize("provider", ["zenodo", "dataverse", "local", "olos"]) -def test_dataset_export_non_existing(runner, client, project, provider): +def test_dataset_export_non_existing(runner, project, provider): """Check dataset not found exception raised.""" result = runner.invoke(cli, ["dataset", "export", "non-existing", provider]) @@ -870,12 +872,12 @@ def test_dataset_export_non_existing(runner, client, project, provider): ], ) def test_export_dataset_unauthorized( - runner, project, client, tmpdir, zenodo_sandbox, dataverse_demo, olos_sandbox, provider, params + runner, project, tmpdir, zenodo_sandbox, dataverse_demo, olos_sandbox, provider, params ): """Test unauthorized exception raised.""" set_value(provider, "access_token", "not-a-token") - project_context.repository.add(".renku/renku.ini") - project_context.repository.commit("update renku.ini") + project.repository.add(".renku/renku.ini") + project.repository.commit("update renku.ini") result = runner.invoke(cli, ["dataset", "create", "my-dataset"]) assert 0 == result.exit_code, format_result_exception(result) + str(result.stderr_bytes) @@ -901,7 +903,7 @@ def test_export_dataset_unauthorized( @pytest.mark.integration @retry_failed @pytest.mark.vcr -def test_export_dataverse_no_dataverse_name(runner, project, client, dataverse_demo): +def test_export_dataverse_no_dataverse_name(runner, project, dataverse_demo): """Test export without providing a dataverse name.""" result = runner.invoke(cli, ["dataset", "create", "my-dataset"]) assert 0 == result.exit_code, format_result_exception(result) + str(result.stderr_bytes) @@ -915,11 +917,11 @@ def test_export_dataverse_no_dataverse_name(runner, project, client, dataverse_d @pytest.mark.integration @retry_failed @pytest.mark.vcr -def test_export_dataverse_no_dataverse_url(runner, client, dataverse_demo): +def test_export_dataverse_no_dataverse_url(runner, project, dataverse_demo): """Test export without providing a dataverse server url.""" remove_value("dataverse", "server_url") - project_context.repository.add(".renku/renku.ini") - project_context.repository.commit("renku.ini") + project.repository.add(".renku/renku.ini") + project.repository.commit("renku.ini") result = runner.invoke(cli, ["dataset", "create", "my-dataset"]) assert 0 == result.exit_code, format_result_exception(result) + str(result.stderr_bytes) @@ -936,7 +938,7 @@ def test_export_dataverse_no_dataverse_url(runner, client, dataverse_demo): @pytest.mark.integration @retry_failed @pytest.mark.vcr -def test_export_imported_dataset_to_dataverse(runner, client, dataverse_demo, zenodo_sandbox): +def test_export_imported_dataset_to_dataverse(runner, project, dataverse_demo, zenodo_sandbox): """Test exporting an imported Zenodo dataset to dataverse.""" result = runner.invoke(cli, ["dataset", "import", "10.5281/zenodo.2658634", "--short-name", "my-data"], input="y") assert 0 == result.exit_code, format_result_exception(result) @@ -951,7 +953,7 @@ def test_export_imported_dataset_to_dataverse(runner, client, dataverse_demo, ze @pytest.mark.integration @pytest.mark.vcr -def test_add_from_url_to_destination(runner, project, load_dataset_with_injection): +def test_add_from_url_to_destination(runner, project): """Test add data from a URL to a new destination.""" url = "https://raw.githubusercontent.com/SwissDataScienceCenter/renku-python/master/docs/Makefile" assert 0 == runner.invoke(cli, ["dataset", "create", "remote"], catch_exceptions=False).exit_code @@ -960,10 +962,10 @@ def test_add_from_url_to_destination(runner, project, load_dataset_with_injectio assert 0 == result.exit_code, format_result_exception(result) + str(result.stderr_bytes) relative_path = os.path.join("data", "remote", "new-name") - assert (project_context.path / relative_path).exists() - assert (project_context.path / relative_path).is_file() + assert (project.path / relative_path).exists() + assert (project.path / relative_path).is_file() - dataset = load_dataset_with_injection("remote", project) + dataset = get_dataset_with_injection("remote") assert dataset.find_file(relative_path) is not None @@ -981,7 +983,7 @@ def test_add_from_url_to_destination(runner, project, load_dataset_with_injectio ], ) @pytest.mark.vcr -def test_add_from_git_to_new_path(runner, client, params, path, load_dataset_with_injection): +def test_add_from_git_to_new_path(runner, project, params, path): """Test add data from a git repository with no destination or to a non-existing destination.""" remote = "https://github.com/SwissDataScienceCenter/renku-jupyter.git" assert 0 == runner.invoke(cli, ["dataset", "create", "remote"], catch_exceptions=False).exit_code @@ -991,7 +993,7 @@ def test_add_from_git_to_new_path(runner, client, params, path, load_dataset_wit assert 0 == result.exit_code, format_result_exception(result) + str(result.stderr_bytes) assert Path(path).exists() - file = load_dataset_with_injection("remote", client).find_file(path) + file = get_dataset_with_injection("remote").find_file(path) assert file.source == remote assert file.based_on.url == remote @@ -1009,23 +1011,19 @@ def test_add_from_git_to_new_path(runner, client, params, path, load_dataset_wit ], ) @pytest.mark.vcr -def test_add_from_git_to_existing_path( - runner, client, params, path, load_dataset_with_injection, no_datadir_commit_warning -): +def test_add_from_git_to_existing_path(runner, project, params, path, no_datadir_commit_warning): """Test add data to datasets from a git repository to an existing path.""" remote = "https://github.com/SwissDataScienceCenter/renku-jupyter.git" assert 0 == runner.invoke(cli, ["dataset", "create", "remote"], catch_exceptions=False).exit_code - write_and_commit_file( - project_context.repository, project_context.path / "data" / "remote" / "existing" / ".gitkeep", "" - ) + write_and_commit_file(project.repository, project.path / "data" / "remote" / "existing" / ".gitkeep", "") result = runner.invoke(cli, ["dataset", "add", "--copy", "remote", "--ref", "0.3.0", remote] + params) assert 0 == result.exit_code, format_result_exception(result) + str(result.stderr_bytes) assert Path(path).exists() - file = load_dataset_with_injection("remote", client).find_file(path) + file = get_dataset_with_injection("remote").find_file(path) assert file.source == remote assert file.based_on.url == remote @@ -1046,7 +1044,7 @@ def test_add_from_git_to_existing_path( ], ) @pytest.mark.vcr -def test_add_from_git_with_wildcards_to_new_path(runner, client, params, files, load_dataset_with_injection): +def test_add_from_git_with_wildcards_to_new_path(runner, project, params, files): """Test add data from a git repository using wildcards to a non-existing destination.""" remote = "https://github.com/SwissDataScienceCenter/renku-jupyter.git" @@ -1070,7 +1068,7 @@ def test_add_from_git_with_wildcards_to_new_path(runner, client, params, files, ], ) @pytest.mark.vcr -def test_add_from_git_with_wildcards_to_existing_path(runner, client, params, files, load_dataset_with_injection): +def test_add_from_git_with_wildcards_to_existing_path(runner, project, params, files): """Test add data from a git repository using wildcards with no destination or to a non-existing destination.""" remote = "https://github.com/SwissDataScienceCenter/renku-jupyter.git" @@ -1085,7 +1083,7 @@ def test_add_from_git_with_wildcards_to_existing_path(runner, client, params, fi @pytest.mark.integration @retry_failed -def test_add_data_in_multiple_places_from_git(runner, client, load_dataset_with_injection): +def test_add_data_in_multiple_places_from_git(runner, project): """Test add same data to datasets in multiple places from a git repository.""" url = "https://github.com/SwissDataScienceCenter/renku-jupyter.git" @@ -1094,13 +1092,13 @@ def test_add_data_in_multiple_places_from_git(runner, client, load_dataset_with_ args = ["dataset", "add", "--copy", "remote", "--ref", "0.3.0"] assert 0 == runner.invoke(cli, args + ["-s", "docker/base/Dockerfile", url]).exit_code - dataset = load_dataset_with_injection("remote", client) + dataset = get_dataset_with_injection("remote") data_dir = Path(dataset.get_datadir()) based_on_id = dataset.find_file(data_dir / "Dockerfile").based_on.id assert 0 == runner.invoke(cli, args + ["-s", "docker", url]).exit_code - dataset = load_dataset_with_injection("remote", client) + dataset = get_dataset_with_injection("remote") assert based_on_id == dataset.find_file(data_dir / "Dockerfile").based_on.id assert based_on_id == dataset.find_file(data_dir / "docker" / "base" / "Dockerfile").based_on.id @@ -1121,7 +1119,7 @@ def test_add_data_in_multiple_places_from_git(runner, client, load_dataset_with_ ) @retry_failed @pytest.mark.vcr -def test_usage_error_in_add_from_git(runner, client, params, n_urls, message): +def test_usage_error_in_add_from_git(runner, project, params, n_urls, message): """Test user's errors when adding to a dataset from a git repository.""" remote = "https://github.com/SwissDataScienceCenter/renku-jupyter.git" @@ -1146,7 +1144,7 @@ def test_usage_error_in_add_from_git(runner, client, params, n_urls, message): @pytest.mark.parametrize("params", [["--all"], ["-I", "README.md"], ["-I", "R*"], ["remote"]]) @retry_failed @pytest.mark.vcr -def test_dataset_update(client, runner, params, load_dataset_with_injection): +def test_dataset_update(project, runner, params): """Test local copy is updated when remote file is updates.""" url = "https://github.com/SwissDataScienceCenter/renku-jupyter.git" @@ -1156,11 +1154,11 @@ def test_dataset_update(client, runner, params, load_dataset_with_injection): ) assert 0 == result.exit_code, format_result_exception(result) + str(result.stderr_bytes) - before = load_dataset_with_injection("remote", client).find_file("data/remote/README.md") + before = get_dataset_with_injection("remote").find_file("data/remote/README.md") assert 0 == runner.invoke(cli, ["dataset", "update"] + params, catch_exceptions=False).exit_code - after = load_dataset_with_injection("remote", client).find_file("data/remote/README.md") + after = get_dataset_with_injection("remote").find_file("data/remote/README.md") assert after.id != before.id assert after.date_added != before.date_added @@ -1178,28 +1176,28 @@ def test_dataset_update(client, runner, params, load_dataset_with_injection): @pytest.mark.parametrize("doi", ["10.5281/zenodo.2658634"]) @retry_failed @pytest.mark.vcr -def test_dataset_update_zenodo(client, runner, doi, load_dataset_with_injection): +def test_dataset_update_zenodo(project, runner, doi): """Test updating datasets from external providers.""" result = runner.invoke( cli, ["dataset", "import", "--short-name", "imported_dataset", doi], input="y", catch_exceptions=False ) assert 0 == result.exit_code, format_result_exception(result) + str(result.stderr_bytes) - commit_sha_after_file1_delete = project_context.repository.head.commit.hexsha + commit_sha_after_file1_delete = project.repository.head.commit.hexsha - before_dataset = load_dataset_with_injection("imported_dataset", client) + before_dataset = get_dataset_with_injection("imported_dataset") result = runner.invoke(cli, ["dataset", "update", "--all", "--dry-run"]) assert 0 == result.exit_code, format_result_exception(result) assert "The following imported datasets will be updated" in result.output assert "imported_dataset" in result.output - assert commit_sha_after_file1_delete == project_context.repository.head.commit.hexsha - assert not project_context.repository.is_dirty(untracked_files=True) + assert commit_sha_after_file1_delete == project.repository.head.commit.hexsha + assert not project.repository.is_dirty(untracked_files=True) result = runner.invoke(cli, ["dataset", "update", "imported_dataset"], catch_exceptions=False) assert 0 == result.exit_code, format_result_exception(result) + str(result.stderr_bytes) - after_dataset = load_dataset_with_injection("imported_dataset", client) + after_dataset = get_dataset_with_injection("imported_dataset") assert after_dataset.version != before_dataset.version assert after_dataset.id != before_dataset.id assert after_dataset.derived_from is None @@ -1213,7 +1211,7 @@ def test_dataset_update_zenodo(client, runner, doi, load_dataset_with_injection) @pytest.mark.parametrize("doi", ["10.7910/DVN/F4NUMR"]) @retry_failed @pytest.mark.vcr -def test_dataset_update_dataverse(client, runner, doi, load_dataset_with_injection, client_database_injection_manager): +def test_dataset_update_dataverse(project, runner, doi, with_injection): """Test updating datasets from external providers. Since dataverse does not have DOIs/IDs for each version, we need to fake the check. @@ -1225,19 +1223,19 @@ def test_dataset_update_dataverse(client, runner, doi, load_dataset_with_injecti assert 0 == runner.invoke(cli, ["dataset", "rm-tags", "imported_dataset", "2.2"], catch_exceptions=False).exit_code - with client_database_injection_manager(client): - with with_dataset(client, name="imported_dataset", commit_database=True) as dataset: + with with_injection(): + with with_dataset(name="imported_dataset", commit_database=True) as dataset: dataset.version = "0.1" - project_context.repository.add(all=True) - project_context.repository.commit("metadata updated") + project.repository.add(all=True) + project.repository.commit("metadata updated") - before_dataset = load_dataset_with_injection("imported_dataset", client) + before_dataset = get_dataset_with_injection("imported_dataset") result = runner.invoke(cli, ["dataset", "update", "imported_dataset"], catch_exceptions=False) assert 0 == result.exit_code, format_result_exception(result) + str(result.stderr_bytes) - after_dataset = load_dataset_with_injection("imported_dataset", client) + after_dataset = get_dataset_with_injection("imported_dataset") assert after_dataset.version != before_dataset.version assert after_dataset.id != before_dataset.id assert after_dataset.derived_from is None @@ -1250,36 +1248,36 @@ def test_dataset_update_dataverse(client, runner, doi, load_dataset_with_injecti @pytest.mark.integration @retry_failed @pytest.mark.vcr -def test_dataset_update_renku(client, runner, load_dataset_with_injection, client_database_injection_manager): +def test_dataset_update_renku(project, runner, with_injection): """Test updating datasets from renku provider.""" uri = "https://dev.renku.ch/datasets/860f6b5b46364c83b6a9b38ef198bcc0" assert 0 == runner.invoke(cli, ["dataset", "import", "--name", "remote-dataset", uri], input="y").exit_code - with client_database_injection_manager(client): - with with_dataset(client, name="remote-dataset", commit_database=True) as dataset: + with with_injection(): + with with_dataset(name="remote-dataset", commit_database=True) as dataset: # NOTE: To mock an update we schema:sameAs to a dataset that has an update update_uri = "https://dev.renku.ch/datasets/04b463b01b514833b236186a941f6259" dataset.same_as = Url(url_id=update_uri) - project_context.repository.add(all=True) - project_context.repository.commit("metadata updated") + project.repository.add(all=True) + project.repository.commit("metadata updated") - commit_sha_after_file1_delete = project_context.repository.head.commit.hexsha + commit_sha_after_file1_delete = project.repository.head.commit.hexsha - before_dataset = load_dataset_with_injection("imported_dataset", client) + before_dataset = get_dataset_with_injection("imported_dataset") result = runner.invoke(cli, ["dataset", "update", "--all", "--dry-run"]) assert 0 == result.exit_code, format_result_exception(result) assert "The following imported datasets will be updated" in result.output assert "remote-dataset" in result.output - assert commit_sha_after_file1_delete == project_context.repository.head.commit.hexsha - assert not project_context.repository.is_dirty(untracked_files=True) + assert commit_sha_after_file1_delete == project.repository.head.commit.hexsha + assert not project.repository.is_dirty(untracked_files=True) result = runner.invoke(cli, ["dataset", "update", "--all"]) assert 0 == result.exit_code, format_result_exception(result) + str(result.stderr_bytes) - after_dataset = load_dataset_with_injection("remote-dataset", client) + after_dataset = get_dataset_with_injection("remote-dataset") assert after_dataset.id != before_dataset.id assert after_dataset.derived_from is None latest_uri = "https://dev.renku.ch/datasets/e55070d995b34b9ba319c6e66f883f00" @@ -1293,7 +1291,7 @@ def test_dataset_update_renku(client, runner, load_dataset_with_injection, clien @pytest.mark.integration @retry_failed -def test_dataset_update_remove_file(client, runner): +def test_dataset_update_remove_file(project, runner): """Test local copy is removed when remote file is removed.""" # Add dataset to project result = runner.invoke( @@ -1312,7 +1310,7 @@ def test_dataset_update_remove_file(client, runner): catch_exceptions=False, ) assert 0 == result.exit_code, format_result_exception(result) + str(result.stderr_bytes) - file_path = project_context.path / DATA_DIR / "remote" / "authors.rst" + file_path = project.path / DATA_DIR / "remote" / "authors.rst" assert file_path.exists() # docs/authors.rst does not exists in v0.5.0 @@ -1331,7 +1329,7 @@ def test_dataset_update_remove_file(client, runner): @pytest.mark.parametrize("params", [["-I", "non-existing"], ["non-existing-dataset"]]) @retry_failed @pytest.mark.vcr -def test_dataset_invalid_update(client, runner, params): +def test_dataset_invalid_update(project, runner, params): """Test updating a non-existing path.""" # Add dataset to project result = runner.invoke( @@ -1359,10 +1357,10 @@ def test_dataset_invalid_update(client, runner, params): @pytest.mark.parametrize("params", [["--all"], ["-I", "CHANGES.rst"], ["-I", "CH*"], ["dataset-1", "dataset-2"]]) @retry_failed @pytest.mark.vcr -def test_dataset_update_multiple_datasets(client, runner, data_repository, params): +def test_dataset_update_multiple_datasets(project, runner, data_repository, params): """Test update with multiple datasets.""" - path1 = project_context.path / DATA_DIR / "dataset-1" / "CHANGES.rst" - path2 = project_context.path / DATA_DIR / "dataset-2" / "CHANGES.rst" + path1 = project.path / DATA_DIR / "dataset-1" / "CHANGES.rst" + path2 = project.path / DATA_DIR / "dataset-2" / "CHANGES.rst" # Add dataset to project result = runner.invoke( cli, @@ -1409,7 +1407,7 @@ def test_dataset_update_multiple_datasets(client, runner, data_repository, param @pytest.mark.integration @retry_failed -def test_empty_update(client, runner, data_repository): +def test_empty_update(project, runner, data_repository): """Test update when nothing changed does not create a commit.""" # Add dataset to project result = runner.invoke( @@ -1429,30 +1427,32 @@ def test_empty_update(client, runner, data_repository): ) assert 0 == result.exit_code, format_result_exception(result) + str(result.stderr_bytes) - commit_sha_before = project_context.repository.head.commit.hexsha + commit_sha_before = project.repository.head.commit.hexsha result = runner.invoke(cli, ["dataset", "update", "--ref", "0.3.0", "--all"], catch_exceptions=False) assert 0 == result.exit_code, format_result_exception(result) + str(result.stderr_bytes) - commit_sha_after = project_context.repository.head.commit.hexsha + commit_sha_after = project.repository.head.commit.hexsha assert commit_sha_after == commit_sha_before @pytest.mark.integration @retry_failed -def test_import_from_renku_project(tmpdir, client, runner, load_dataset_with_injection): +def test_import_from_renku_project(tmpdir, project, runner): """Check metadata for an imported dataset from other renkulab repo.""" + from renku.domain_model.project_context import project_context + url = "https://dev.renku.ch/gitlab/renku-testing/project-9.git" - path = tmpdir.mkdir("remote_repo") - os.environ["GIT_LFS_SKIP_SMUDGE"] = "1" - Repository.clone_from(url=url, path=path, recursive=True) + def get_remote_file(): + repo_path = tmpdir.mkdir("remote_repo") + os.environ["GIT_LFS_SKIP_SMUDGE"] = "1" + Repository.clone_from(url=url, path=repo_path, recursive=True) - with project_context.with_path(path): - with chdir(project_context.path): + with chdir(repo_path): runner.invoke(cli, ["migrate", "--strict"]) - file = load_dataset_with_injection("testing-create-04", project_context.repository).find_file( - "data/testing-create-04/ie_data_with_TRCAPE.xls" - ) + with project_context.with_path(repo_path): + dataset = get_dataset_with_injection("testing-create-04") + return dataset.find_file("data/testing-create-04/ie_data_with_TRCAPE.xls") result = runner.invoke( cli, @@ -1474,7 +1474,8 @@ def test_import_from_renku_project(tmpdir, client, runner, load_dataset_with_inj assert 0 == result.exit_code, format_result_exception(result) + str(result.stderr_bytes) path = "data/remote-dataset/new-directory/ie_data_with_TRCAPE.xls" - metadata = load_dataset_with_injection("remote-dataset", client).find_file(path) + metadata = get_dataset_with_injection("remote-dataset").find_file(path) + file = get_remote_file() assert metadata.based_on.checksum == file.entity.checksum assert metadata.based_on.path == file.entity.path assert metadata.based_on.url == url @@ -1484,7 +1485,7 @@ def test_import_from_renku_project(tmpdir, client, runner, load_dataset_with_inj @pytest.mark.parametrize("ref", ["v0.3.0", "fe6ec65cc84bcf01e879ef38c0793208f7fab4bb"]) @retry_failed @pytest.mark.vcr -def test_add_specific_refs(ref, runner, client): +def test_add_specific_refs(ref, runner, project): """Test adding a specific version of files.""" filename = "CHANGES.rst" @@ -1507,7 +1508,7 @@ def test_add_specific_refs(ref, runner, client): ], ) assert 0 == result.exit_code, format_result_exception(result) - content = (project_context.path / DATA_DIR / "dataset" / filename).read_text() + content = (project.path / DATA_DIR / "dataset" / filename).read_text() assert "v0.3.0" in content assert "v0.3.1" not in content @@ -1516,7 +1517,7 @@ def test_add_specific_refs(ref, runner, client): @pytest.mark.parametrize("ref", ["v0.3.1", "27e29abd409c83129a3fdb8b8b0b898b23bcb229"]) @retry_failed @pytest.mark.vcr -def test_update_specific_refs(ref, runner, client): +def test_update_specific_refs(ref, runner, project): """Test updating to a specific version of files.""" filename = "CHANGES.rst" # create a dataset @@ -1539,30 +1540,30 @@ def test_update_specific_refs(ref, runner, client): ) assert 0 == result.exit_code, format_result_exception(result) + str(result.stderr_bytes) file = Path(DATA_DIR) / "dataset" / filename - content = (project_context.path / file).read_text() + content = (project.path / file).read_text() assert "v0.3.1" not in content - commit_sha_after_file1_delete = project_context.repository.head.commit.hexsha + commit_sha_after_file1_delete = project.repository.head.commit.hexsha result = runner.invoke(cli, ["dataset", "update", "--ref", ref, "--all", "--dry-run"]) assert 1 == result.exit_code, format_result_exception(result) assert "The following files will be updated" in result.output assert str(file) in result.output - assert commit_sha_after_file1_delete == project_context.repository.head.commit.hexsha - assert not project_context.repository.is_dirty(untracked_files=True) + assert commit_sha_after_file1_delete == project.repository.head.commit.hexsha + assert not project.repository.is_dirty(untracked_files=True) # update data to a later version result = runner.invoke(cli, ["dataset", "update", "--ref", ref, "--all"]) assert 0 == result.exit_code, format_result_exception(result) + str(result.stderr_bytes) - content = (project_context.path / DATA_DIR / "dataset" / filename).read_text() + content = (project.path / DATA_DIR / "dataset" / filename).read_text() assert "v0.3.1" in content assert "v0.3.2" not in content @pytest.mark.integration @retry_failed -def test_update_with_multiple_remotes_and_ref(runner, client): +def test_update_with_multiple_remotes_and_ref(runner, project): """Test updating fails when ref is ambiguous.""" # create a dataset result = runner.invoke(cli, ["dataset", "create", "dataset"]) @@ -1605,7 +1606,7 @@ def test_update_with_multiple_remotes_and_ref(runner, client): @pytest.mark.integration @retry_failed -def test_files_are_tracked_in_lfs(runner, client, no_lfs_size_limit): +def test_files_are_tracked_in_lfs(runner, project, no_lfs_size_limit): """Test files added from a Git repo are tacked in Git LFS.""" filename = "poetry.lock" # create a dataset @@ -1636,14 +1637,14 @@ def test_files_are_tracked_in_lfs(runner, client, no_lfs_size_limit): "url", ["https://username:password@raw.githubusercontent.com/SwissDataScienceCenter/renku-python/master/docs/Makefile"], ) -def test_add_removes_credentials(runner, client, url, load_dataset_with_injection): +def test_add_removes_credentials(runner, project, url): """Check removal of credentials during adding of remote data files.""" from urllib.parse import urlparse result = runner.invoke(cli, ["dataset", "add", "--copy", "-c", "my-dataset", url]) assert 0 == result.exit_code, format_result_exception(result) + str(result.stderr_bytes) - dataset = load_dataset_with_injection("my-dataset", client) + dataset = get_dataset_with_injection("my-dataset") file = dataset.files[0] url_obj = urlparse(url) assert file.source == url_obj._replace(netloc=url_obj.hostname).geturl() @@ -1661,7 +1662,7 @@ def test_add_removes_credentials(runner, client, url, load_dataset_with_injectio ("attachment;filename=\"EURO rates.csv\";filename*=utf-8''%e2%82%ac%20rates.csv", "€ rates.csv"), ], ) -def test_add_with_content_disposition(runner, client, monkeypatch, disposition, filename, load_dataset_with_injection): +def test_add_with_content_disposition(runner, project, monkeypatch, disposition, filename): """Check filename is read from content disposition.""" import renku.core.util.requests @@ -1679,7 +1680,7 @@ def _fake_disposition(response): result = runner.invoke(cli, ["dataset", "add", "--copy", "-c", "my-dataset", url]) assert 0 == result.exit_code, format_result_exception(result) + str(result.stderr_bytes) - dataset = load_dataset_with_injection("my-dataset", client) + dataset = get_dataset_with_injection("my-dataset") file = dataset.files[0] assert Path(file.entity.path).name == filename @@ -1689,7 +1690,7 @@ def _fake_disposition(response): @pytest.mark.parametrize( "url", ["https://raw.githubusercontent.com/SwissDataScienceCenter/renku-python/master/docs/Makefile"] ) -def test_check_disk_space(runner, client, monkeypatch, url): +def test_check_disk_space(runner, project, monkeypatch, url): """Check adding to dataset prompts if disk space is not enough.""" def disk_usage(_): @@ -1710,20 +1711,22 @@ def disk_usage(_): @pytest.mark.migration @pytest.mark.integration @retry_failed -def test_migration_submodule_datasets(isolated_runner, old_repository_with_submodules, load_dataset_with_injection): +def test_migration_submodule_datasets(isolated_runner, old_repository_with_submodules): """Test migration of datasets that use submodules.""" + from renku.domain_model.project_context import project_context + project_path = old_repository_with_submodules.path os.chdir(project_path) - assert ["remote-renku-project"] == [s.name for s in old_repository_with_submodules.submodules] + assert ["remote-renku-project"] == [s.name for s in old_repository_with_submodules.repository.submodules] result = isolated_runner.invoke(cli, ["migrate", "--strict"]) assert 0 == result.exit_code, format_result_exception(result) - assert [] == list(old_repository_with_submodules.submodules) + assert [] == list(old_repository_with_submodules.repository.submodules) with project_context.with_path(project_path): - dataset = load_dataset_with_injection("remote", project_context.repository) + dataset = get_dataset_with_injection("remote") for file in dataset.files: path = Path(file.entity.path) @@ -1753,11 +1756,11 @@ def test_dataset_add_dropbox(runner, project, url, size): @pytest.mark.integration @retry_failed @pytest.mark.vcr -def test_immutability_at_import(runner, client, load_dataset_with_injection): +def test_immutability_at_import(runner, project): """Test first dataset's ID after import is the same as its initial identifier.""" assert 0 == runner.invoke(cli, ["dataset", "import", "-y", "--name", "my-dataset", "10.7910/DVN/F4NUMR"]).exit_code - dataset = load_dataset_with_injection("my-dataset", client) + dataset = get_dataset_with_injection("my-dataset") assert dataset.initial_identifier == dataset.identifier @@ -1765,35 +1768,35 @@ def test_immutability_at_import(runner, client, load_dataset_with_injection): @pytest.mark.integration @retry_failed @pytest.mark.vcr -def test_immutability_after_import(runner, client, load_dataset_with_injection): +def test_immutability_after_import(runner, project): """Test first dataset's ID after import is the same as metadata directory.""" assert 0 == runner.invoke(cli, ["dataset", "import", "-y", "--name", "my-dataset", "10.7910/DVN/F4NUMR"]).exit_code - old_dataset = load_dataset_with_injection("my-dataset", client) + old_dataset = get_dataset_with_injection("my-dataset") # Make some modification in dataset assert 0 == runner.invoke(cli, ["dataset", "edit", "my-dataset", "-k", "new-data"]).exit_code - dataset = load_dataset_with_injection("my-dataset", client) - mutator = get_git_user(project_context.repository) + dataset = get_dataset_with_injection("my-dataset") + mutator = get_git_user(project.repository) assert_dataset_is_mutated(old=old_dataset, new=dataset, mutator=mutator) @pytest.mark.integration @retry_failed -def test_immutability_after_update(client, runner, load_dataset_with_injection): +def test_immutability_after_update(project, runner): """Test dataset is mutated after an update.""" url = "https://github.com/SwissDataScienceCenter/renku-jupyter.git" result = runner.invoke(cli, ["dataset", "add", "--create", "my-data", "--ref", "0.3.0", "-s", "README.md", url]) assert 0 == result.exit_code, format_result_exception(result) + str(result.stderr_bytes) - old_dataset = load_dataset_with_injection("my-data", client) + old_dataset = get_dataset_with_injection("my-data") assert 0 == runner.invoke(cli, ["dataset", "update", "--all"], catch_exceptions=False).exit_code - dataset = load_dataset_with_injection("my-data", client) - mutator = get_git_user(project_context.repository) + dataset = get_dataset_with_injection("my-data") + mutator = get_git_user(project.repository) assert_dataset_is_mutated(old=old_dataset, new=dataset, mutator=mutator) @@ -1808,11 +1811,11 @@ def test_immutability_after_update(client, runner, load_dataset_with_injection): ], ) @retry_failed -def test_import_returns_last_dataset_version(runner, client, url, load_dataset_with_injection): +def test_import_returns_last_dataset_version(runner, project, url): """Test importing with any identifier returns the last version of dataset.""" assert 0 == runner.invoke(cli, ["dataset", "import", "-y", "--name", "my-dataset", url]).exit_code - dataset = load_dataset_with_injection("my-dataset", client) + dataset = get_dataset_with_injection("my-dataset") initial_identifier = "9dde49ee031a4568b193a58892e26534" latest_identifier = "0dc3a120e4af4a4ca88870d1719c4631" @@ -1823,17 +1826,17 @@ def test_import_returns_last_dataset_version(runner, client, url, load_dataset_w @pytest.mark.integration @retry_failed @pytest.mark.vcr -def test_datasets_provenance_after_import(runner, client, get_datasets_provenance_with_injection): +def test_datasets_provenance_after_import(runner, project): """Test dataset provenance is updated after importing a dataset.""" assert 0 == runner.invoke(cli, ["dataset", "import", "-y", "--name", "my-data", "10.7910/DVN/F4NUMR"]).exit_code - with get_datasets_provenance_with_injection(client) as datasets_provenance: + with get_datasets_provenance_with_injection() as datasets_provenance: assert datasets_provenance.get_by_name("my-data") is not None @pytest.mark.integration @retry_failed -def test_datasets_provenance_after_git_update(client, runner, get_datasets_provenance_with_injection): +def test_datasets_provenance_after_git_update(project, runner): """Test dataset provenance is updated after an update.""" url = "https://github.com/SwissDataScienceCenter/renku-jupyter.git" @@ -1842,7 +1845,7 @@ def test_datasets_provenance_after_git_update(client, runner, get_datasets_prove assert 0 == runner.invoke(cli, ["dataset", "update", "--all"], catch_exceptions=False).exit_code - with get_datasets_provenance_with_injection(client) as datasets_provenance: + with get_datasets_provenance_with_injection() as datasets_provenance: current_version = datasets_provenance.get_by_name("my-data") assert current_version.identifier != current_version.initial_identifier @@ -1850,14 +1853,14 @@ def test_datasets_provenance_after_git_update(client, runner, get_datasets_prove @pytest.mark.integration @retry_failed @pytest.mark.vcr -def test_datasets_provenance_after_external_provider_update(client, runner, get_datasets_provenance_with_injection): +def test_datasets_provenance_after_external_provider_update(project, runner): """Test dataset provenance is not updated after an update from an external provider.""" doi = "10.5281/zenodo.2658634" assert 0 == runner.invoke(cli, ["dataset", "import", "-y", "--name", "my-data", doi]).exit_code assert 0 == runner.invoke(cli, ["dataset", "update", "my-data"]).exit_code - with get_datasets_provenance_with_injection(client) as datasets_provenance: + with get_datasets_provenance_with_injection() as datasets_provenance: current_version = datasets_provenance.get_by_name("my-data") assert current_version.identifier != current_version.initial_identifier @@ -1866,23 +1869,23 @@ def test_datasets_provenance_after_external_provider_update(client, runner, get_ @pytest.mark.integration @retry_failed @pytest.mark.vcr -def test_datasets_import_with_tag(client, runner, get_datasets_provenance_with_injection): +def test_datasets_import_with_tag(project, runner): """Test dataset import from a Renku provider with a specified tag version.""" doi = "https://dev.renku.ch/datasets/ddafee6bb38a46f99346cb563afc2c64" result = runner.invoke(cli, ["dataset", "import", "-y", "--tag", "v1", doi]) assert 0 == result.exit_code, format_result_exception(result) - with get_datasets_provenance_with_injection(client) as datasets_provenance: + with get_datasets_provenance_with_injection() as datasets_provenance: dataset = datasets_provenance.get_by_name("parts") - dataset_path = project_context.path / "data" / "parts" + dataset_path = project.path / "data" / "parts" assert "v1" == dataset.version assert (dataset_path / "README.md").exists() # This file was deleted in a later version assert doi == dataset.same_as.value assert "Updated on 01.06.2022" not in (dataset_path / "parts.csv").read_text() - git_attributes = (project_context.repository.path / ".gitattributes").read_text() + git_attributes = (project.repository.path / ".gitattributes").read_text() assert "data/parts/parts.csv" in git_attributes assert "data/parts/part_relationships.csv" in git_attributes @@ -1895,16 +1898,16 @@ def test_datasets_import_with_tag(client, runner, get_datasets_provenance_with_i @pytest.mark.integration @retry_failed @pytest.mark.vcr -def test_datasets_imported_with_tag_are_not_updated(client, runner): +def test_datasets_imported_with_tag_are_not_updated(project, runner): """Test dataset that are imported with a specified tag version won't be updated.""" doi = "https://dev.renku.ch/datasets/ddafee6bb38a46f99346cb563afc2c64" assert 0 == runner.invoke(cli, ["dataset", "import", "-y", "--tag", "v1", doi]).exit_code - commit_sha_before = project_context.repository.head.commit.hexsha + commit_sha_before = project.repository.head.commit.hexsha result = runner.invoke(cli, ["dataset", "update", "--all"]) - commit_sha_after = project_context.repository.head.commit.hexsha + commit_sha_after = project.repository.head.commit.hexsha assert 0 == result.exit_code, format_result_exception(result) assert "Skipped updating imported Renku dataset 'parts' with tag 'v1'" in result.output @@ -1914,22 +1917,20 @@ def test_datasets_imported_with_tag_are_not_updated(client, runner): @pytest.mark.integration @retry_failed @pytest.mark.vcr -def test_dataset_update_removes_deleted_files( - client, runner, client_database_injection_manager, get_datasets_provenance_with_injection -): +def test_dataset_update_removes_deleted_files(project, runner, with_injection): """Test dataset update removes deleted files in the updated renku datasets.""" doi = "https://dev.renku.ch/datasets/ddafee6bb38a46f99346cb563afc2c64" assert 0 == runner.invoke(cli, ["dataset", "import", "-y", "--tag", "v1", "--name", "parts", doi]).exit_code # NOTE: Allow dataset to be updatable by removing ``version`` and setting ``same_as`` to another id of the dataset - with client_database_injection_manager(client): - with with_dataset(client, name="parts", commit_database=True) as dataset: + with with_injection(): + with with_dataset(name="parts", commit_database=True) as dataset: dataset.version = None dataset.same_as = Url(url_id="https://dev.renku.ch/datasets/abc934939cbf45dca0cfef61d05fa132") - project_context.repository.add(all=True) - project_context.repository.commit("metadata updated") + project.repository.add(all=True) + project.repository.commit("metadata updated") - with get_datasets_provenance_with_injection(client) as datasets_provenance: + with get_datasets_provenance_with_injection() as datasets_provenance: dataset = datasets_provenance.get_by_name("parts") assert 4 == len(dataset.files) @@ -1937,7 +1938,7 @@ def test_dataset_update_removes_deleted_files( result = runner.invoke(cli, ["dataset", "update", "parts"]) assert 0 == result.exit_code, format_result_exception(result) - with get_datasets_provenance_with_injection(client) as datasets_provenance: + with get_datasets_provenance_with_injection() as datasets_provenance: dataset = datasets_provenance.get_by_name("parts") assert 2 == len(dataset.files) @@ -1969,25 +1970,25 @@ def test_dataset_ls_with_tag(runner, tmp_path): @pytest.mark.integration @retry_failed @pytest.mark.vcr -def test_create_with_s3_backend(runner, client, load_dataset_with_injection): +def test_create_with_s3_backend(runner, project): """Test creating a dataset with a valid S3 backend storage.""" result = runner.invoke(cli, ["dataset", "create", "s3-data", "--storage", "s3://giab/"], input="\n\n\n") assert 0 == result.exit_code, format_result_exception(result) + str(result.stderr_bytes) - dataset = load_dataset_with_injection("s3-data", client) + dataset = get_dataset_with_injection("s3-data") assert "s3://giab/" == dataset.storage # NOTE: Dataset's data dir is git-ignored dataset_datadir = os.path.join(DATA_DIR, "s3-data") - assert {dataset_datadir} == set(project_context.repository.get_ignored_paths(dataset_datadir)) + assert {dataset_datadir} == set(project.repository.get_ignored_paths(dataset_datadir)) @pytest.mark.integration @retry_failed @pytest.mark.vcr -def test_create_with_non_existing_s3_backend(runner, client, load_dataset_with_injection): +def test_create_with_non_existing_s3_backend(runner, project): """Test creating a dataset with an invalid S3 backend storage.""" result = runner.invoke(cli, ["dataset", "create", "s3-data", "--storage", "s3://no-giab/"], input="\n\n\n") @@ -1998,7 +1999,7 @@ def test_create_with_non_existing_s3_backend(runner, client, load_dataset_with_i @pytest.mark.integration @retry_failed @pytest.mark.vcr -def test_create_with_unauthorized_s3_backend(runner, client, load_dataset_with_injection): +def test_create_with_unauthorized_s3_backend(runner, project): """Test creating a dataset with an invalid credentials.""" result = runner.invoke(cli, ["dataset", "create", "s3-data", "--storage", "s3://amazon/"], input="\n\n\n") @@ -2009,7 +2010,7 @@ def test_create_with_unauthorized_s3_backend(runner, client, load_dataset_with_i @pytest.mark.integration @retry_failed @pytest.mark.vcr -def test_pull_data_from_s3_backend(runner, client, load_dataset_with_injection): +def test_pull_data_from_s3_backend(runner, project): """Test pulling data for a dataset with an S3 backend.""" result = runner.invoke(cli, ["dataset", "create", "s3-data", "--storage", "s3://giab/"], input="\n\n\n") @@ -2032,23 +2033,23 @@ def test_pull_data_from_s3_backend(runner, client, load_dataset_with_injection): assert 0 == result.exit_code, format_result_exception(result) + str(result.stderr_bytes) - dataset = load_dataset_with_injection("s3-data", client) + dataset = get_dataset_with_injection("s3-data") file = next(f for f in dataset.files if f.entity.path.endswith("Aspera_download_from_ftp.README")) - assert (project_context.path / file.entity.path).exists() - assert not (project_context.path / file.entity.path).is_symlink() + assert (project.path / file.entity.path).exists() + assert not (project.path / file.entity.path).is_symlink() file = next(f for f in dataset.files if f.entity.path.endswith("02structural.bed.gz")) - assert (project_context.path / file.entity.path).exists() - assert not (project_context.path / file.entity.path).is_symlink() + assert (project.path / file.entity.path).exists() + assert not (project.path / file.entity.path).is_symlink() assert "0ddc10ab9f9f0dd0fea4d66d9a55ba99" == file.based_on.checksum @pytest.mark.integration @retry_failed @pytest.mark.vcr -def test_pull_data_from_s3_backend_to_a_location(runner, client, load_dataset_with_injection, tmp_path): +def test_pull_data_from_s3_backend_to_a_location(runner, project, tmp_path): """Test pulling data for a dataset with an S3 backend to a location other than dataset's data directory.""" result = runner.invoke(cli, ["dataset", "create", "s3-data", "--storage", "s3://giab/"], input="\n\n\n") @@ -2073,19 +2074,19 @@ def test_pull_data_from_s3_backend_to_a_location(runner, client, load_dataset_wi assert 0 == result.exit_code, format_result_exception(result) + str(result.stderr_bytes) - dataset = load_dataset_with_injection("s3-data", client) + dataset = get_dataset_with_injection("s3-data") file = next(f for f in dataset.files if f.entity.path.endswith("Aspera_download_from_ftp.README")) - assert (project_context.path / file.entity.path).is_symlink() - assert (location / file.entity.path).resolve() == (project_context.path / file.entity.path).resolve() + assert (project.path / file.entity.path).is_symlink() + assert (location / file.entity.path).resolve() == (project.path / file.entity.path).resolve() file = next(f for f in dataset.files if f.entity.path.endswith("02structural.bed.gz")) - assert (project_context.path / file.entity.path).is_symlink() - assert (location / file.entity.path).resolve() == (project_context.path / file.entity.path).resolve() + assert (project.path / file.entity.path).is_symlink() + assert (location / file.entity.path).resolve() == (project.path / file.entity.path).resolve() assert "0ddc10ab9f9f0dd0fea4d66d9a55ba99" == file.based_on.checksum - assert str(location) in (project_context.path / ".renku" / "renku.ini").read_text() + assert str(location) in (project.path / ".renku" / "renku.ini").read_text() @pytest.mark.integration @@ -2098,7 +2099,7 @@ def test_pull_data_from_s3_backend_to_a_location(runner, client, load_dataset_wi ([], ["s3://giab/tools", "s3://giab/changelog_details"], "s3://giab"), ], ) -def test_adding_data_from_s3(runner, client, create_s3_dataset, mocker, args, uris, storage_uri): +def test_adding_data_from_s3(runner, project, create_s3_dataset, mocker, args, uris, storage_uri): """Ensure metadata from a bucket can be added.""" mock_s3_storage = mocker.patch("renku.infrastructure.storage.s3.S3Storage", autospec=True) instance_s3_storage = mock_s3_storage.return_value @@ -2148,7 +2149,7 @@ def test_adding_data_from_s3(runner, client, create_s3_dataset, mocker, args, ur ), ], ) -def test_invalid_s3_args(runner, client, create_s3_dataset, cmd_args, expected_error_msg, mocker): +def test_invalid_s3_args(runner, project, create_s3_dataset, cmd_args, expected_error_msg, mocker): """Test invalid arguments for adding data to S3 dataset.""" mock_s3_storage = mocker.patch("renku.infrastructure.storage.s3.S3Storage", autospec=True) storage_uri = "s3://giab" @@ -2174,7 +2175,7 @@ def test_invalid_s3_args(runner, client, create_s3_dataset, cmd_args, expected_e ("s3://giab/1/2/3", "s3://giab/1/3/2"), ], ) -def test_adding_s3_data_outside_sub_path_not_allowed(runner, client, create_s3_dataset, mocker, storage_uri, add_uri): +def test_adding_s3_data_outside_sub_path_not_allowed(runner, project, create_s3_dataset, mocker, storage_uri, add_uri): """Ensure that data from bucket that does not match storage bucket name or path cannot be added.""" mocker.patch("renku.infrastructure.storage.s3.S3Storage", autospec=True) dataset_name = "test-s3-dataset" @@ -2187,17 +2188,17 @@ def test_adding_s3_data_outside_sub_path_not_allowed(runner, client, create_s3_d @pytest.mark.integration @retry_failed -def test_mount_unmount_data_from_s3_backend(runner, client): +def test_mount_unmount_data_from_s3_backend(runner, project): """Test mounting/unmounting data for a dataset with an S3 backend.""" result = runner.invoke(cli, ["dataset", "create", "s3-data", "--storage", "s3://giab/"], input="\n\n\n") assert 0 == result.exit_code, format_result_exception(result) + str(result.stderr_bytes) - s3_data = project_context.path / "data" / "s3-data" / "Aspera_download_from_ftp.README" + s3_data = project.path / "data" / "s3-data" / "Aspera_download_from_ftp.README" assert not s3_data.exists() # NOTE: Create some dummy files - dummy = project_context.path / "data" / "s3-data" / "dummy" + dummy = project.path / "data" / "s3-data" / "dummy" dummy.parent.mkdir(exist_ok=True, parents=True) dummy.touch() @@ -2217,17 +2218,17 @@ def test_mount_unmount_data_from_s3_backend(runner, client): @pytest.mark.integration @retry_failed -def test_mount_data_from_an_existing_mount_point(runner, client, tmp_path): +def test_mount_data_from_an_existing_mount_point(runner, project, tmp_path): """Test get data for a dataset with an S3 backend from an existing mount-point.""" result = runner.invoke(cli, ["dataset", "create", "s3-data", "--storage", "s3://giab/"], input="\n\n\n") assert 0 == result.exit_code, format_result_exception(result) + str(result.stderr_bytes) - s3_data = project_context.path / "data" / "s3-data" / "Aspera_download_from_ftp.README" + s3_data = project.path / "data" / "s3-data" / "Aspera_download_from_ftp.README" assert not s3_data.exists() # NOTE: Create some dummy files - dummy = project_context.path / "data" / "s3-data" / "dummy" + dummy = project.path / "data" / "s3-data" / "dummy" dummy.parent.mkdir(exist_ok=True, parents=True) dummy.touch() diff --git a/tests/cli/test_isolation.py b/tests/cli/test_isolation.py index 432db4a8e2..61f4ddd620 100644 --- a/tests/cli/test_isolation.py +++ b/tests/cli/test_isolation.py @@ -26,32 +26,31 @@ from tests.utils import write_and_commit_file -def test_run_in_isolation(runner, repository, run, subdirectory): +def test_run_in_isolation(runner, project, run, subdirectory): """Test run in isolation.""" - cwd = repository.path - write_and_commit_file(repository, path=cwd / ".gitignore", content="\nlock") + write_and_commit_file(project.repository, path=project.path / ".gitignore", content="\nlock") prefix = ["run", "--no-output"] cmd = ["python", "-S", "-c", 'import os, sys; sys.exit(1 if os.path.exists("lock") else 0)'] - head = repository.head.commit.hexsha + head = project.repository.head.commit.hexsha with Lock("lock"): assert 1 == run(args=prefix + cmd) - assert repository.head.commit.hexsha == head + assert project.repository.head.commit.hexsha == head assert 0 == run(prefix + ["--isolation"] + cmd) - assert repository.head.commit.hexsha != head + assert project.repository.head.commit.hexsha != head -def test_file_modification_during_run(tmp_path, runner, repository, subdirectory, no_lfs_size_limit, no_lfs_warning): +def test_file_modification_during_run(tmp_path, runner, project, subdirectory, no_lfs_size_limit, no_lfs_warning): """Test run in isolation.""" - script = repository.path / "script.py" - output = repository.path / "output" + script = project.path / "script.py" + output = project.path / "output" lock_file = tmp_path / "lock" write_and_commit_file( - repository, + project.repository, script, textwrap.dedent( f""" @@ -84,11 +83,11 @@ def test_file_modification_during_run(tmp_path, runner, repository, subdirectory assert 0 == process.wait() # NOTE: ``script.py`` is modified in the current worktree - assert {"script.py"} == {c.a_path for c in repository.unstaged_changes} + assert {"script.py"} == {c.a_path for c in project.repository.unstaged_changes} # NOTE: Isolated run finished with the expected result assert "test" == output.read_text().strip() # NOTE: Isolated run committed its results - committed_changed_files_in_run = {c.a_path for c in repository.head.commit.get_changes()} + committed_changed_files_in_run = {c.a_path for c in project.repository.head.commit.get_changes()} assert "output" in committed_changed_files_in_run assert "script.py" not in committed_changed_files_in_run diff --git a/tests/cli/test_log.py b/tests/cli/test_log.py index 90cccba421..5e350e0bc7 100644 --- a/tests/cli/test_log.py +++ b/tests/cli/test_log.py @@ -17,7 +17,6 @@ # limitations under the License. """Test ``log`` command.""" -from renku.domain_model.project_context import project_context from renku.ui.cli import cli from tests.utils import format_result_exception @@ -53,35 +52,35 @@ def test_activity_log(runner, project): assert "output-2: bar" in result.output -def test_dataset_log(runner, project, client): +def test_dataset_log(runner, project): """Test renku log for dataset.""" - result = runner.invoke(cli, ["dataset", "create", "testset"]) + result = runner.invoke(cli, ["dataset", "create", "test-set"]) assert 0 == result.exit_code, format_result_exception(result) - with (project_context.path / "my_file").open("w") as fp: + with (project.path / "my_file").open("w") as fp: fp.write("dataset file") - result = runner.invoke(cli, ["dataset", "add", "--copy", "testset", "my_file"]) + result = runner.invoke(cli, ["dataset", "add", "--copy", "test-set", "my_file"]) assert 0 == result.exit_code, format_result_exception(result) result = runner.invoke( - cli, ["dataset", "edit", "testset", "-t", "new title", "-d", "new description", "-k", "a", "-k", "b"] + cli, ["dataset", "edit", "test-set", "-t", "new title", "-d", "new description", "-k", "a", "-k", "b"] ) assert 0 == result.exit_code, format_result_exception(result) - result = runner.invoke(cli, ["dataset", "unlink", "testset", "--include", "my_file"], input="y") + result = runner.invoke(cli, ["dataset", "unlink", "test-set", "--include", "my_file"], input="y") assert 0 == result.exit_code, format_result_exception(result) - result = runner.invoke(cli, ["dataset", "rm", "testset"], input="y") + result = runner.invoke(cli, ["dataset", "rm", "test-set"], input="y") assert 0 == result.exit_code, format_result_exception(result) result = runner.invoke(cli, ["log"]) assert 0 == result.exit_code, format_result_exception(result) - assert "Dataset testset" in result.output + assert "Dataset test-set" in result.output assert "Changes: created" in result.output assert "Changes: modified" in result.output assert "Changes: deleted" in result.output assert "Files modified" in result.output - assert "- data/testset/my_file" in result.output - assert "+ data/testset/my_file" in result.output + assert "- data/test-set/my_file" in result.output + assert "+ data/test-set/my_file" in result.output assert "Title set to: new title" in result.output assert "Description set to: new description" in result.output assert "Keywords modified" in result.output diff --git a/tests/cli/test_login.py b/tests/cli/test_login.py index 706a2342dd..aadd6e18b1 100644 --- a/tests/cli/test_login.py +++ b/tests/cli/test_login.py @@ -27,27 +27,27 @@ from tests.utils import format_result_exception -def test_login(runner, project_with_remote, mock_login, client_database_injection_manager): +def test_login(runner, project_with_remote, mock_login, with_injection): """Test login command.""" remote_url = f"https://{ENDPOINT}/gitlab/namespace/project" - project_with_remote.remotes[0].set_url(remote_url) + project_with_remote.repository.remotes[0].set_url(remote_url) result = runner.invoke(cli, ["login", "--git", ENDPOINT], input="y") assert 0 == result.exit_code, format_result_exception(result) - with client_database_injection_manager(project_with_remote): + with with_injection(): assert ACCESS_TOKEN == read_renku_token(ENDPOINT) assert ACCESS_TOKEN == read_renku_token("", get_endpoint_from_remote=True) - credential = project_with_remote.get_configuration().get_value("credential", "helper") + credential = project_with_remote.repository.get_configuration().get_value("credential", "helper") assert f"!renku credentials --hostname {ENDPOINT}" == credential - assert {"origin", "renku-backup-origin"} == {r.name for r in project_with_remote.remotes} - assert remote_url == project_with_remote.remotes["renku-backup-origin"].url - assert project_with_remote.remotes["origin"].url.startswith(f"https://{ENDPOINT}/repo") + assert {"origin", "renku-backup-origin"} == {r.name for r in project_with_remote.repository.remotes} + assert remote_url == project_with_remote.repository.remotes["renku-backup-origin"].url + assert project_with_remote.repository.remotes["origin"].url.startswith(f"https://{ENDPOINT}/repo") @pytest.mark.parametrize("args", [[], ["--git"]]) -def test_login_no_endpoint(runner, client, mock_login, args): +def test_login_no_endpoint(runner, project, mock_login, args): """Test login command with no endpoint.""" result = runner.invoke(cli, ["login"] + args) @@ -65,7 +65,7 @@ def test_login_no_endpoint_and_remote(runner, project_with_remote, mock_login, a @pytest.mark.parametrize("args", [[], ["--git"]]) -def test_login_invalid_endpoint(runner, client, mock_login, args): +def test_login_invalid_endpoint(runner, project, mock_login, args): """Test login with and invalid endpoint.""" result = runner.invoke(cli, ["login", "http: //example.com"] + args) @@ -73,7 +73,7 @@ def test_login_invalid_endpoint(runner, client, mock_login, args): assert "Invalid endpoint: `http: //example.com`." in result.output -def test_login_with_config_endpoint(runner, client, mock_login): +def test_login_with_config_endpoint(runner, project, mock_login): """Test login command with endpoint in config file.""" assert 0 == runner.invoke(cli, ["config", "set", "endpoint", ENDPOINT]).exit_code @@ -83,7 +83,7 @@ def test_login_with_config_endpoint(runner, client, mock_login): assert "Successfully logged in." in result.output -def test_logout(runner, client, mock_login, client_database_injection_manager): +def test_logout(runner, project, mock_login, with_injection): """Test logout removes all credentials.""" assert 0 == runner.invoke(cli, ["login", ENDPOINT]).exit_code @@ -91,22 +91,22 @@ def test_logout(runner, client, mock_login, client_database_injection_manager): assert 0 == result.exit_code, format_result_exception(result) - with client_database_injection_manager(client): + with with_injection(): assert read_renku_token(ENDPOINT) is None assert "Successfully logged out." in result.output -def test_repeated_login(runner, client, mock_login, client_database_injection_manager): +def test_repeated_login(runner, project, mock_login, with_injection): """Test multiple logins.""" assert 0 == runner.invoke(cli, ["login", ENDPOINT]).exit_code assert 0 == runner.invoke(cli, ["login", ENDPOINT]).exit_code - with client_database_injection_manager(client): + with with_injection(): assert ACCESS_TOKEN == read_renku_token(ENDPOINT) -def test_repeated_logout(runner, client, mock_login, client_database_injection_manager): +def test_repeated_logout(runner, project, mock_login, with_injection): """Test multiple logouts.""" assert 0 == runner.invoke(cli, ["login", ENDPOINT]).exit_code @@ -114,11 +114,11 @@ def test_repeated_logout(runner, client, mock_login, client_database_injection_m assert 0 == runner.invoke(cli, ["logout"]).exit_code - with client_database_injection_manager(client): + with with_injection(): assert read_renku_token(ENDPOINT) is None -def test_login_to_multiple_endpoints(runner, client, mock_login, client_database_injection_manager): +def test_login_to_multiple_endpoints(runner, project, mock_login, with_injection): """Test login to multiple endpoints.""" second_endpoint, second_token = "second.endpoint", "second-token" mock_login.add_device_auth(second_endpoint, second_token) @@ -126,12 +126,12 @@ def test_login_to_multiple_endpoints(runner, client, mock_login, client_database assert 0 == runner.invoke(cli, ["login", second_endpoint]).exit_code - with client_database_injection_manager(client): + with with_injection(): assert ACCESS_TOKEN == read_renku_token(ENDPOINT) assert second_token == read_renku_token(second_endpoint) -def test_logout_all(runner, client, mock_login, client_database_injection_manager): +def test_logout_all(runner, project, mock_login, with_injection): """Test logout with no endpoint removes multiple credentials.""" second_endpoint, second_token = "second.endpoint", "second-token" mock_login.add_device_auth(second_endpoint, second_token) @@ -140,12 +140,12 @@ def test_logout_all(runner, client, mock_login, client_database_injection_manage assert 0 == runner.invoke(cli, ["logout"]).exit_code - with client_database_injection_manager(client): + with with_injection(): assert read_renku_token(ENDPOINT) is None assert read_renku_token(second_endpoint) is None -def test_logout_one_endpoint(runner, client, mock_login, client_database_injection_manager): +def test_logout_one_endpoint(runner, project, mock_login, with_injection): """Test logout from an endpoint removes credentials for that endpoint only.""" second_endpoint, second_token = "second.endpoint", "second-token" mock_login.add_device_auth(second_endpoint, second_token) @@ -154,18 +154,18 @@ def test_logout_one_endpoint(runner, client, mock_login, client_database_injecti assert 0 == runner.invoke(cli, ["logout", ENDPOINT]).exit_code - with client_database_injection_manager(client): + with with_injection(): assert read_renku_token(ENDPOINT) is None assert second_token == read_renku_token(second_endpoint) -def test_logout_non_existing_endpoint(runner, client, mock_login, client_database_injection_manager): +def test_logout_non_existing_endpoint(runner, project, mock_login, with_injection): """Test logout from a non-existing endpoint does nothing.""" assert 0 == runner.invoke(cli, ["login", ENDPOINT]).exit_code assert 0 == runner.invoke(cli, ["logout", "non.existing"]).exit_code - with client_database_injection_manager(client): + with with_injection(): assert read_renku_token(ENDPOINT) is not None @@ -178,7 +178,7 @@ def test_login_git_abort(runner, project_with_remote): assert "Aborted!" in result.output -def test_login_non_git(runner, client, directory_tree): +def test_login_non_git(runner, project, directory_tree): """Test login inside a non-git directory.""" with chdir(directory_tree): result = runner.invoke(cli, ["login", "--git", ENDPOINT]) @@ -187,7 +187,7 @@ def test_login_non_git(runner, client, directory_tree): assert "Cannot use '--git' flag outside a project" in result.output -def test_logout_non_git(runner, client, directory_tree): +def test_logout_non_git(runner, project, directory_tree): """Test logout inside a non-git directory.""" with chdir(directory_tree): result = runner.invoke(cli, ["logout"]) @@ -198,9 +198,9 @@ def test_logout_non_git(runner, client, directory_tree): def test_login_git_no_unique_remote(runner, project_with_remote): """Test login from a git directory with no clear remote.""" - project_with_remote.remotes.add("second-remote", "second-remote.net") - project_with_remote.branches.add("branch-with-no-remote") - project_with_remote.checkout("branch-with-no-remote") + project_with_remote.repository.remotes.add("second-remote", "second-remote.net") + project_with_remote.repository.branches.add("branch-with-no-remote") + project_with_remote.repository.checkout("branch-with-no-remote") result = runner.invoke(cli, ["login", "--git", ENDPOINT]) @@ -210,7 +210,7 @@ def test_login_git_no_unique_remote(runner, project_with_remote): def test_repeated_git_login(runner, project_with_remote, mock_login): """Test multiple logins to git repo fails to change remote URL after first time.""" - remote_url = project_with_remote.remotes[0].url + remote_url = project_with_remote.repository.remotes[0].url assert 0 == runner.invoke(cli, ["login", "--git", "--yes", ENDPOINT]).exit_code @@ -219,25 +219,27 @@ def test_repeated_git_login(runner, project_with_remote, mock_login): assert 0 == result.exit_code, format_result_exception(result) assert "Backup remote 'renku-backup-origin' already exists. Ignoring '--git' flag." in result.output assert "Error: Cannot create backup remote 'renku-backup-origin' for" not in result.output - assert {"origin", "renku-backup-origin"} == {r.name for r in project_with_remote.remotes} - assert remote_url == project_with_remote.remotes["renku-backup-origin"].url - assert project_with_remote.remotes["origin"].url.startswith(f"https://{ENDPOINT}/repo") - assert not project_with_remote.remotes["origin"].url.startswith(f"https://{ENDPOINT}/repo/repo") + assert {"origin", "renku-backup-origin"} == {r.name for r in project_with_remote.repository.remotes} + assert remote_url == project_with_remote.repository.remotes["renku-backup-origin"].url + assert project_with_remote.repository.remotes["origin"].url.startswith(f"https://{ENDPOINT}/repo") + assert not project_with_remote.repository.remotes["origin"].url.startswith(f"https://{ENDPOINT}/repo/repo") def test_logout_git(runner, project_with_remote, mock_login): """Test logout removes backup remotes and restores original remote url.""" - remote_url = project_with_remote.remotes[0].url + remote_url = project_with_remote.repository.remotes[0].url assert 0 == runner.invoke(cli, ["login", "--git", "--yes", ENDPOINT]).exit_code result = runner.invoke(cli, ["logout"]) assert 0 == result.exit_code, format_result_exception(result) - assert {"origin"} == {r.name for r in project_with_remote.remotes} - assert remote_url == project_with_remote.remotes["origin"].url + assert {"origin"} == {r.name for r in project_with_remote.repository.remotes} + assert remote_url == project_with_remote.repository.remotes["origin"].url try: - credential = project_with_remote.get_configuration(scope="local").remove_value("credential", "helper") + credential = project_with_remote.repository.get_configuration(scope="local").remove_value( + "credential", "helper" + ) except errors.GitConfigurationError: # NOTE: If already logged out, ``git config --unset`` raises an exception credential = None assert credential is None @@ -276,7 +278,7 @@ def test_token_no_credential(runner, project_with_remote, mock_login): assert "password=\n" in result.output -def test_token_invalid_command(runner, project_with_remote, mock_login, client_database_injection_manager): +def test_token_invalid_command(runner, project_with_remote, mock_login, with_injection): """Test call credential helper with a command other than 'get'.""" assert 0 == runner.invoke(cli, ["login", ENDPOINT]).exit_code @@ -285,5 +287,5 @@ def test_token_invalid_command(runner, project_with_remote, mock_login, client_d assert 0 == result.exit_code, format_result_exception(result) assert "" == result.output - with client_database_injection_manager(project_with_remote): + with with_injection(): assert read_renku_token(ENDPOINT) is not None diff --git a/tests/cli/test_merge.py b/tests/cli/test_merge.py index c88b110c8f..3e1f259d71 100644 --- a/tests/cli/test_merge.py +++ b/tests/cli/test_merge.py @@ -17,7 +17,6 @@ # limitations under the License. """Test ``move`` command.""" - from renku.core.dataset.datasets_provenance import DatasetsProvenance from renku.infrastructure.gateway.activity_gateway import ActivityGateway from renku.infrastructure.gateway.plan_gateway import PlanGateway @@ -26,7 +25,7 @@ from tests.utils import format_result_exception -def test_mergetool(runner, client, directory_tree, run_shell, client_database_injection_manager): +def test_mergetool(runner, project, directory_tree, run_shell, with_injection): """Test that merge tool can merge renku metadata.""" result = runner.invoke(cli, ["mergetool", "install"]) @@ -129,7 +128,7 @@ def test_mergetool(runner, client, directory_tree, run_shell, client_database_in assert 0 == result.exit_code, format_result_exception(result) - with client_database_injection_manager(client): + with with_injection(): project_gateway = ProjectGateway() project = project_gateway.get_project() datasets_provenance = DatasetsProvenance() @@ -148,7 +147,7 @@ def test_mergetool(runner, client, directory_tree, run_shell, client_database_in assert "remote description" == shared_dataset.description -def test_mergetool_workflow_conflict(runner, client, run_shell, client_database_injection_manager): +def test_mergetool_workflow_conflict(runner, project, run_shell, with_injection): """Test that merge tool can merge conflicting workflows.""" result = runner.invoke(cli, ["mergetool", "install"]) @@ -175,7 +174,7 @@ def test_mergetool_workflow_conflict(runner, client, run_shell, client_database_ assert b"" == output[0] assert output[1] is None - with client_database_injection_manager(client): + with with_injection(): plan_gateway = PlanGateway() remote_plans = plan_gateway.get_newest_plans_by_names() @@ -212,7 +211,7 @@ def test_mergetool_workflow_conflict(runner, client, run_shell, client_database_ assert 0 == result.exit_code, format_result_exception(result) - with client_database_injection_manager(client): + with with_injection(): activity_gateway = ActivityGateway() activities = activity_gateway.get_all_activities() plan_gateway = PlanGateway() @@ -229,7 +228,7 @@ def test_mergetool_workflow_conflict(runner, client, run_shell, client_database_ result = runner.invoke(cli, ["workflow", "execute", "common-name"]) assert 0 == result.exit_code, format_result_exception(result) - with client_database_injection_manager(client): + with with_injection(): activity_gateway = ActivityGateway() activities = activity_gateway.get_all_activities() plan_gateway = PlanGateway() @@ -239,7 +238,7 @@ def test_mergetool_workflow_conflict(runner, client, run_shell, client_database_ assert len(plans) == 4 -def test_mergetool_workflow_complex_conflict(runner, client, run_shell, client_database_injection_manager): +def test_mergetool_workflow_complex_conflict(runner, project, run_shell, with_injection): """Test that merge tool can merge complex conflicts in workflows.""" result = runner.invoke(cli, ["mergetool", "install"]) @@ -264,7 +263,7 @@ def test_mergetool_workflow_complex_conflict(runner, client, run_shell, client_d assert b"" == output[0] - with client_database_injection_manager(client): + with with_injection(): plan_gateway = PlanGateway() remote_plans = plan_gateway.get_newest_plans_by_names() @@ -282,7 +281,7 @@ def test_mergetool_workflow_complex_conflict(runner, client, run_shell, client_d assert b"" == output[0] - with client_database_injection_manager(client): + with with_injection(): plan_gateway = PlanGateway() local_plans = plan_gateway.get_newest_plans_by_names() @@ -303,7 +302,7 @@ def test_mergetool_workflow_complex_conflict(runner, client, run_shell, client_d assert 0 == result.exit_code, format_result_exception(result) - with client_database_injection_manager(client): + with with_injection(): activity_gateway = ActivityGateway() activities = activity_gateway.get_all_activities() plan_gateway = PlanGateway() diff --git a/tests/cli/test_migrate.py b/tests/cli/test_migrate.py index 2d485e6e02..a5858f24ea 100644 --- a/tests/cli/test_migrate.py +++ b/tests/cli/test_migrate.py @@ -26,13 +26,13 @@ from renku.core.constant import RENKU_HOME, RENKU_TMP from renku.core.dataset.datasets_provenance import DatasetsProvenance -from renku.core.management.migrate import SUPPORTED_PROJECT_VERSION, get_migrations +from renku.core.migration.migrate import SUPPORTED_PROJECT_VERSION, get_migrations from renku.domain_model.dataset import RemoteEntity from renku.domain_model.project_context import project_context from renku.infrastructure.gateway.dataset_gateway import DatasetGateway from renku.infrastructure.repository import Repository from renku.ui.cli import cli -from tests.utils import format_result_exception +from tests.utils import format_result_exception, get_dataset_with_injection, get_datasets_provenance_with_injection @pytest.mark.migration @@ -40,17 +40,17 @@ def test_migrate_datasets_with_old_repository(isolated_runner, old_project): """Test migrate on old repository.""" result = isolated_runner.invoke(cli, ["migrate", "--strict"]) assert 0 == result.exit_code, format_result_exception(result) - assert not old_project.is_dirty(untracked_files=True) + assert not old_project.repository.is_dirty(untracked_files=True) @pytest.mark.migration -def test_migrate_project(isolated_runner, old_project, with_injections_manager): +def test_migrate_project(isolated_runner, old_project, with_injection): """Test migrate on old repository.""" result = isolated_runner.invoke(cli, ["migrate", "--strict"]) assert 0 == result.exit_code, format_result_exception(result) - assert not old_project.is_dirty(untracked_files=True) + assert not old_project.repository.is_dirty(untracked_files=True) - with project_context.with_path(old_project.path), with_injections_manager(old_project): + with project_context.with_path(old_project.path), with_injection(): assert project_context.project assert project_context.project.name @@ -93,13 +93,13 @@ def test_migration_check(isolated_runner, project): @pytest.mark.migration -def test_correct_path_migrated(isolated_runner, old_project, client_database_injection_manager): +def test_correct_path_migrated(isolated_runner, old_project, with_injection): """Check if path on dataset files has been correctly migrated.""" result = isolated_runner.invoke(cli, ["migrate", "--strict"]) assert 0 == result.exit_code, format_result_exception(result) with project_context.with_path(old_project.path): - with client_database_injection_manager(old_project): + with with_injection(): datasets = DatasetGateway().get_all_active_datasets() assert datasets @@ -112,13 +112,13 @@ def test_correct_path_migrated(isolated_runner, old_project, client_database_inj @pytest.mark.migration -def test_correct_relative_path(isolated_runner, old_project, client_database_injection_manager): +def test_correct_relative_path(isolated_runner, old_project, with_injection): """Check if path on dataset has been correctly migrated.""" result = isolated_runner.invoke(cli, ["migrate", "--strict"]) assert 0 == result.exit_code, format_result_exception(result) with project_context.with_path(path=old_project.path): - with client_database_injection_manager(old_project): + with with_injection(): datasets_provenance = DatasetsProvenance() assert len(list(datasets_provenance.datasets)) > 0 @@ -127,21 +127,18 @@ def test_correct_relative_path(isolated_runner, old_project, client_database_inj @pytest.mark.migration def test_remove_committed_lock_file(isolated_runner, old_project): """Check that renku lock file has been successfully removed from git.""" - repo = old_project - repo_path = Path(old_project.path) - with open(str(repo_path / ".renku.lock"), "w") as f: - f.write("lock") + (old_project.path / ".renku.lock").write_text("lock") - repo.add(".renku.lock", force=True) - repo.commit("locked") + old_project.repository.add(".renku.lock", force=True) + old_project.repository.commit("locked") result = isolated_runner.invoke(cli, ["migrate", "--strict"]) assert 0 == result.exit_code, format_result_exception(result) - assert not (repo_path / ".renku.lock").exists() - assert not repo.is_dirty(untracked_files=True) + assert not (old_project.path / ".renku.lock").exists() + assert not old_project.repository.is_dirty(untracked_files=True) - ignored = (repo_path / ".gitignore").read_text() + ignored = (old_project.path / ".gitignore").read_text() assert ".renku.lock" in ignored @@ -168,7 +165,7 @@ def test_migrations_runs(isolated_runner, old_project): assert "No migrations required." in result.output tmp_path = os.path.join(RENKU_HOME, RENKU_TMP) - paths = [c.b_path for c in old_project.head.commit.get_changes() if tmp_path in c.b_path] + paths = [c.b_path for c in old_project.repository.head.commit.get_changes() if tmp_path in c.b_path] assert 0 == len(paths), ", ".join(paths) @@ -185,6 +182,7 @@ def test_migration_version(): @pytest.mark.migration def test_workflow_migration(isolated_runner, old_workflow_project): """Check that *.cwl workflows can be migrated.""" + _, expected_strings = old_workflow_project result = isolated_runner.invoke(cli, ["migrate", "--strict"]) assert 0 == result.exit_code, format_result_exception(result) @@ -193,22 +191,19 @@ def test_workflow_migration(isolated_runner, old_workflow_project): result = isolated_runner.invoke(cli, ["graph", "export", "--full"]) assert 0 == result.exit_code, format_result_exception(result) - for expected in old_workflow_project["expected_strings"]: + for expected in expected_strings: assert expected in result.output @pytest.mark.migration -def test_comprehensive_dataset_migration( - isolated_runner, old_dataset_project, load_dataset_with_injection, get_datasets_provenance_with_injection -): +def test_comprehensive_dataset_migration(isolated_runner, old_dataset_project): """Test migration of old project with all dataset variations.""" result = isolated_runner.invoke(cli, ["migrate", "--strict"]) assert 0 == result.exit_code, format_result_exception(result) assert "OK" in result.output - client = old_dataset_project - dataset = load_dataset_with_injection("dataverse", client) - with get_datasets_provenance_with_injection(client) as datasets_provenance: + dataset = get_dataset_with_injection("dataverse") + with get_datasets_provenance_with_injection() as datasets_provenance: tags = datasets_provenance.get_all_tags(dataset) assert "/datasets/1d2ed1e43aeb4f2590b238084ee3d86c" == dataset.id @@ -230,8 +225,8 @@ def test_comprehensive_dataset_migration( assert file_.based_on is None assert not hasattr(file_, "creators") - dataset = load_dataset_with_injection("mixed", client) - with get_datasets_provenance_with_injection(client) as datasets_provenance: + dataset = get_dataset_with_injection("mixed") + with get_datasets_provenance_with_injection() as datasets_provenance: tags = datasets_provenance.get_all_tags(dataset) assert "v1" == tags[0].name @@ -263,14 +258,12 @@ def test_comprehensive_dataset_migration( ], indirect=["old_dataset_project"], ) -def test_migrate_renku_dataset_same_as( - isolated_runner, old_dataset_project, load_dataset_with_injection, name, same_as -): +def test_migrate_renku_dataset_same_as(isolated_runner, old_dataset_project, name, same_as): """Test migration of imported renku datasets remove dashes from the same_as field.""" result = isolated_runner.invoke(cli, ["migrate", "--strict"]) assert 0 == result.exit_code, format_result_exception(result) - dataset = load_dataset_with_injection(name, old_dataset_project) + dataset = get_dataset_with_injection(name) assert same_as == dataset.same_as.value @@ -284,24 +277,22 @@ def test_migrate_renku_dataset_same_as( ], indirect=["old_dataset_project"], ) -def test_migrate_renku_dataset_derived_from( - isolated_runner, old_dataset_project, load_dataset_with_injection, name, derived_from -): +def test_migrate_renku_dataset_derived_from(isolated_runner, old_dataset_project, name, derived_from): """Test migration of datasets remove dashes from the derived_from field.""" result = isolated_runner.invoke(cli, ["migrate", "--strict"]) assert 0 == result.exit_code, format_result_exception(result) - dataset = load_dataset_with_injection(name, old_dataset_project) + dataset = get_dataset_with_injection(name) assert derived_from == dataset.derived_from.url_id @pytest.mark.migration -def test_no_blank_node_after_dataset_migration(isolated_runner, old_dataset_project, load_dataset_with_injection): +def test_no_blank_node_after_dataset_migration(isolated_runner, old_dataset_project): """Test migration of datasets with blank nodes creates IRI identifiers.""" assert 0 == isolated_runner.invoke(cli, ["migrate", "--strict"]).exit_code - dataset = load_dataset_with_injection("2019-01_us_fligh_1", old_dataset_project) + dataset = get_dataset_with_injection("2019-01_us_fligh_1") assert not dataset.creators[0].id.startswith("_:") assert not dataset.same_as.id.startswith("_:") @@ -410,12 +401,12 @@ def test_commands_work_on_old_repository(isolated_runner, old_repository_with_su @pytest.mark.migration @pytest.mark.parametrize("name", ["mixed", "dataverse"]) -def test_migrate_can_preserve_dataset_ids(isolated_runner, old_dataset_project, load_dataset_with_injection, name): +def test_migrate_can_preserve_dataset_ids(isolated_runner, old_dataset_project, name): """Test migrate can preserve old datasets' ids.""" result = isolated_runner.invoke(cli, ["migrate", "--strict", "--preserve-identifiers"]) assert 0 == result.exit_code, format_result_exception(result) - dataset = load_dataset_with_injection(name, old_dataset_project) + dataset = get_dataset_with_injection(name) assert dataset.identifier == dataset.initial_identifier assert dataset.derived_from is None @@ -426,37 +417,31 @@ def test_migrate_can_preserve_dataset_ids(isolated_runner, old_dataset_project, @pytest.mark.migration -def test_migrate_preserves_creation_date_when_preserving_ids( - isolated_runner, old_dataset_project, load_dataset_with_injection -): +def test_migrate_preserves_creation_date_when_preserving_ids(isolated_runner, old_dataset_project): """Test migrate doesn't change dataset's dateCreated when --preserve-identifiers is passed.""" assert 0 == isolated_runner.invoke(cli, ["migrate", "--strict", "--preserve-identifiers"]).exit_code - dataset = load_dataset_with_injection("mixed", old_dataset_project) + dataset = get_dataset_with_injection("mixed") assert "2020-08-10 21:35:20+00:00" == dataset.date_created.isoformat(" ") @pytest.mark.migration @pytest.mark.parametrize("old_dataset_project", ["old-datasets-v0.16.0.git"], indirect=True) -def test_migrate_preserves_creation_date_for_mutated_datasets( - isolated_runner, old_dataset_project, load_dataset_with_injection -): +def test_migrate_preserves_creation_date_for_mutated_datasets(isolated_runner, old_dataset_project): """Test migration of datasets that were mutated keeps original dateCreated.""" assert 0 == isolated_runner.invoke(cli, ["migrate", "--strict"]).exit_code - dataset = load_dataset_with_injection("local", old_dataset_project) + dataset = get_dataset_with_injection("local") assert "2021-07-23 14:34:58+00:00" == dataset.date_created.isoformat(" ") @pytest.mark.migration -def test_migrate_sets_correct_creation_date_for_non_mutated_datasets( - isolated_runner, old_dataset_project, load_dataset_with_injection -): +def test_migrate_sets_correct_creation_date_for_non_mutated_datasets(isolated_runner, old_dataset_project): """Test migration of datasets that weren't mutated uses commit date as dateCreated.""" assert 0 == isolated_runner.invoke(cli, ["migrate", "--strict"]).exit_code - dataset = load_dataset_with_injection("mixed", old_dataset_project) + dataset = get_dataset_with_injection("mixed") assert "2020-08-10 23:35:56+02:00" == dataset.date_created.isoformat(" ") diff --git a/tests/cli/test_move.py b/tests/cli/test_move.py index 595224a84e..1b4e7cbcba 100644 --- a/tests/cli/test_move.py +++ b/tests/cli/test_move.py @@ -24,12 +24,11 @@ import pytest from renku.core.constant import DEFAULT_DATA_DIR as DATA_DIR -from renku.domain_model.project_context import project_context from renku.ui.cli import cli -from tests.utils import format_result_exception +from tests.utils import format_result_exception, get_dataset_with_injection -def test_move(runner, client): +def test_move(runner, project): """Test move of files.""" src1 = Path("src1") / "sub" / "src1.txt" src1.parent.mkdir(parents=True, exist_ok=True) @@ -37,8 +36,8 @@ def test_move(runner, client): src2 = Path("src2") / "sub" / "src2.txt" src2.parent.mkdir(parents=True, exist_ok=True) src2.touch() - project_context.repository.add(all=True) - project_context.repository.commit("Add some files") + project.repository.add(all=True) + project.repository.commit("Add some files") result = runner.invoke(cli, ["mv", "-v", "src1", "src2", "dst/sub"]) @@ -56,7 +55,7 @@ def test_move(runner, client): assert 0 == result.exit_code, format_result_exception(result) -def test_move_outside_paths(runner, client, directory_tree): +def test_move_outside_paths(runner, project, directory_tree): """Test move from/to outside paths is not possible.""" result = runner.invoke(cli, ["mv", str(directory_tree), "data"]) @@ -69,7 +68,7 @@ def test_move_outside_paths(runner, client, directory_tree): assert f"Error: Invalid parameter value - Path '{directory_tree}' is outside the project" in result.output -def test_move_non_existing_sources(runner, client): +def test_move_non_existing_sources(runner, project): """Test move from non-existing sources is not possible.""" result = runner.invoke(cli, ["mv", "non-existing", "data"]) @@ -78,7 +77,7 @@ def test_move_non_existing_sources(runner, client): @pytest.mark.parametrize("path", [".renku", ".renku/metadata/root", ".gitignore", "Dockerfile"]) -def test_move_protected_paths(runner, client, path): +def test_move_protected_paths(runner, project, path): """Test move from/to protected paths is not possible.""" result = runner.invoke(cli, ["mv", path, "README.md"]) @@ -91,11 +90,11 @@ def test_move_protected_paths(runner, client, path): assert f"Invalid parameter value - Path '{path}' is protected." in result.output -def test_move_existing_destination(runner, client): +def test_move_existing_destination(runner, project): """Test move to existing destination.""" - (project_context.path / "source").write_text("123") - project_context.repository.add(all=True) - project_context.repository.commit("source file") + (project.path / "source").write_text("123") + project.repository.add(all=True) + project.repository.commit("source file") result = runner.invoke(cli, ["mv", "source", "README.md"]) @@ -112,7 +111,7 @@ def test_move_existing_destination(runner, client): assert "123" == Path("README.md").read_text() -def test_move_to_ignored_file(runner, client): +def test_move_to_ignored_file(runner, project): """Test move to an ignored pattern.""" result = runner.invoke(cli, ["mv", "README.md", "ignored.so"]) @@ -121,9 +120,9 @@ def test_move_to_ignored_file(runner, client): assert "ignored.so" in result.output -def test_move_empty_source(runner, client): +def test_move_empty_source(runner, project): """Test move from empty directory.""" - (project_context.path / "empty").mkdir() + (project.path / "empty").mkdir() result = runner.invoke(cli, ["mv", "empty", "data"]) @@ -131,13 +130,13 @@ def test_move_empty_source(runner, client): assert "Invalid parameter value - There are no files to move" in result.output -def test_move_dataset_file(runner, project_with_datasets, directory_tree_files, load_dataset_with_injection): +def test_move_dataset_file(runner, project_with_datasets, directory_tree_files): """Test move of a file that belongs to a dataset.""" for path in directory_tree_files: src = Path("data") / "dataset-2" / path assert src.exists() - dataset_before = load_dataset_with_injection("dataset-2", project_with_datasets) + dataset_before = get_dataset_with_injection("dataset-2") result = runner.invoke(cli, ["mv", "data", "files"], input="y", catch_exceptions=False) assert 0 == result.exit_code, format_result_exception(result) @@ -147,7 +146,7 @@ def test_move_dataset_file(runner, project_with_datasets, directory_tree_files, assert 0 == result.exit_code, format_result_exception(result) # Check immutability - dataset_after = load_dataset_with_injection("dataset-2", project_with_datasets) + dataset_after = get_dataset_with_injection("dataset-2") assert dataset_before.id != dataset_after.id assert dataset_before.identifier != dataset_after.identifier @@ -162,19 +161,19 @@ def test_move_dataset_file(runner, project_with_datasets, directory_tree_files, @pytest.mark.parametrize("args", [[], ["--to-dataset", "dataset-2"]]) -def test_move_in_the_same_dataset(runner, project_with_datasets, args, load_dataset_with_injection): +def test_move_in_the_same_dataset(runner, project_with_datasets, args): """Test move and overwrite a file in the same dataset.""" src = os.path.join("data", "dataset-2", "file1") dst = os.path.join("data", "dataset-2", "dir1", "file2") - dataset = load_dataset_with_injection("dataset-2", project_with_datasets) + dataset = get_dataset_with_injection("dataset-2") file_before = dataset.find_file(dst) result = runner.invoke(cli, ["mv", "-f", src, dst] + args) assert 0 == result.exit_code, format_result_exception(result) - dataset = load_dataset_with_injection("dataset-2", project_with_datasets) + dataset = get_dataset_with_injection("dataset-2") assert {dst, dst.replace("file2", "file3")} == {f.entity.path for f in dataset.files} - assert not (project_context.path / src).exists() + assert not (project_with_datasets.path / src).exists() file_after = dataset.find_file(dst) assert file_after.entity.checksum != file_before.entity.checksum assert dst == file_after.entity.path @@ -182,14 +181,14 @@ def test_move_in_the_same_dataset(runner, project_with_datasets, args, load_data result = runner.invoke(cli, ["doctor"], catch_exceptions=False) assert 0 == result.exit_code, format_result_exception(result) - assert not project_with_datasets.is_dirty(untracked_files=True) + assert not project_with_datasets.repository.is_dirty(untracked_files=True) -def test_move_to_existing_destination_in_a_dataset(runner, project_with_datasets, load_dataset_with_injection): +def test_move_to_existing_destination_in_a_dataset(runner, project_with_datasets): """Test move to a file in dataset will update file's metadata.""" - (project_context.path / "source").write_text("new-content") - project_with_datasets.add(all=True) - project_with_datasets.commit("source file") + (project_with_datasets.path / "source").write_text("new-content") + project_with_datasets.repository.add(all=True) + project_with_datasets.repository.commit("source file") result = runner.invoke(cli, ["mv", "-f", "--to-dataset", "dataset-2", "source", "new_file"]) assert 2 == result.exit_code, format_result_exception(result) @@ -197,13 +196,13 @@ def test_move_to_existing_destination_in_a_dataset(runner, project_with_datasets dst = os.path.join("data", "dataset-2", "file1") - dataset_before = load_dataset_with_injection("dataset-2", project_with_datasets) + dataset_before = get_dataset_with_injection("dataset-2") file_before = dataset_before.find_file(dst) result = runner.invoke(cli, ["mv", "-f", "source", dst]) assert 0 == result.exit_code, format_result_exception(result) - dataset_after = load_dataset_with_injection("dataset-2", project_with_datasets) + dataset_after = get_dataset_with_injection("dataset-2") file_after = dataset_after.find_file(dst) # Check dataset immutability @@ -217,7 +216,7 @@ def test_move_to_existing_destination_in_a_dataset(runner, project_with_datasets result = runner.invoke(cli, ["doctor"], catch_exceptions=False) assert 0 == result.exit_code, format_result_exception(result) - assert not project_with_datasets.is_dirty(untracked_files=True) + assert not project_with_datasets.repository.is_dirty(untracked_files=True) @pytest.mark.parametrize( @@ -230,9 +229,7 @@ def test_move_to_existing_destination_in_a_dataset(runner, project_with_datasets os.path.join(DATA_DIR, "dataset", "subdir", "subdir", "destination"), ), ) -def test_move_external_files( - data_repository, runner, client, destination, directory_tree, directory_tree_files, load_dataset_with_injection -): +def test_move_external_files(data_repository, runner, project, destination, directory_tree, directory_tree_files): """Test move of external files (symlinks).""" assert 0 == runner.invoke(cli, ["dataset", "add", "-c", "--external", "my-dataset", str(directory_tree)]).exit_code @@ -245,7 +242,7 @@ def test_move_external_files( assert dst.is_symlink() assert directory_tree / path == dst.resolve() - file = load_dataset_with_injection("my-dataset", client).find_file(dst) + file = get_dataset_with_injection("my-dataset").find_file(dst) assert file assert str(dst) in file.entity.id assert file.is_external @@ -253,12 +250,10 @@ def test_move_external_files( result = runner.invoke(cli, ["doctor"], catch_exceptions=False) assert 0 == result.exit_code, result.output - assert not project_context.repository.is_dirty(untracked_files=True) + assert not project.repository.is_dirty(untracked_files=True) -def test_move_between_datasets( - runner, client, directory_tree, large_file, directory_tree_files, load_dataset_with_injection -): +def test_move_between_datasets(runner, project, directory_tree, large_file, directory_tree_files): """Test move files between datasets.""" shutil.copy(large_file, directory_tree / "file1") shutil.copy(large_file, directory_tree / "dir1" / "file2") @@ -275,10 +270,10 @@ def test_move_between_datasets( assert 0 == result.exit_code, format_result_exception(result) assert not source.exists() - assert 0 == len(load_dataset_with_injection("dataset-1", client).files) - assert 1 == len(load_dataset_with_injection("dataset-2", client).files) + assert 0 == len(get_dataset_with_injection("dataset-1").files) + assert 1 == len(get_dataset_with_injection("dataset-2").files) - dataset = load_dataset_with_injection("dataset-3", client) + dataset = get_dataset_with_injection("dataset-3") assert 3 == len(dataset.files) for path in directory_tree_files: @@ -294,17 +289,15 @@ def test_move_between_datasets( assert 0 == runner.invoke(cli, ["mv", "-f", src1, dst1, "--to-dataset", "dataset-1"]).exit_code src2 = os.path.join("data", "dataset-3", directory_tree.name, "file1") dst2 = os.path.join("data", "dataset-2") - (project_context.path / dst2).mkdir(parents=True, exist_ok=True) + (project.path / dst2).mkdir(parents=True, exist_ok=True) result = runner.invoke(cli, ["mv", src2, dst2, "--force", "--to-dataset", "dataset-2"]) assert 0 == result.exit_code, format_result_exception(result) assert {"data/dataset-1/file2", "data/dataset-1/file3"} == { - f.entity.path for f in load_dataset_with_injection("dataset-1", client).files - } - assert {"data/dataset-2/file1"} == {f.entity.path for f in load_dataset_with_injection("dataset-2", client).files} - assert {"data/dataset-3/large-file"} == { - f.entity.path for f in load_dataset_with_injection("dataset-3", client).files + f.entity.path for f in get_dataset_with_injection("dataset-1").files } + assert {"data/dataset-2/file1"} == {f.entity.path for f in get_dataset_with_injection("dataset-2").files} + assert {"data/dataset-3/large-file"} == {f.entity.path for f in get_dataset_with_injection("dataset-3").files} assert 0 == runner.invoke(cli, ["doctor"], catch_exceptions=False).exit_code - assert not project_context.repository.is_dirty(untracked_files=True) + assert not project.repository.is_dirty(untracked_files=True) diff --git a/tests/cli/test_output_option.py b/tests/cli/test_output_option.py index 586b8e6782..d1b2b61b36 100644 --- a/tests/cli/test_output_option.py +++ b/tests/cli/test_output_option.py @@ -20,14 +20,13 @@ import os from pathlib import Path -from renku.domain_model.project_context import project_context from renku.ui.cli import cli from tests.utils import format_result_exception, write_and_commit_file -def test_run_succeeds_normally(renku_cli, client, subdirectory): +def test_run_succeeds_normally(renku_cli, project, subdirectory): """Test when an output is detected.""" - foo = os.path.relpath(project_context.path / "foo", os.getcwd()) + foo = os.path.relpath(project.path / "foo", os.getcwd()) exit_code, activity = renku_cli("run", "touch", foo) assert 0 == exit_code @@ -45,9 +44,9 @@ def test_when_no_change_in_outputs_is_detected(renku_cli, subdirectory): assert 1 == exit_code -def test_with_no_output_option(renku_cli, client, subdirectory): +def test_with_no_output_option(renku_cli, project, subdirectory): """Test --no-output option with no output detection.""" - foo = os.path.relpath(project_context.path / "foo", os.getcwd()) + foo = os.path.relpath(project.path / "foo", os.getcwd()) renku_cli("run", "touch", foo) exit_code, activity = renku_cli("run", "--no-output", "touch", foo) @@ -58,13 +57,13 @@ def test_with_no_output_option(renku_cli, client, subdirectory): assert 0 == len(plan.outputs) -def test_explicit_outputs_and_normal_outputs(renku_cli, client, subdirectory): +def test_explicit_outputs_and_normal_outputs(renku_cli, project, subdirectory): """Test explicit outputs and normal outputs can co-exist.""" - foo = os.path.relpath(project_context.path / "foo", os.getcwd()) + foo = os.path.relpath(project.path / "foo", os.getcwd()) os.mkdir(foo) - bar = os.path.relpath(project_context.path / "bar", os.getcwd()) + bar = os.path.relpath(project.path / "bar", os.getcwd()) renku_cli("run", "touch", bar) - baz = os.path.relpath(project_context.path / "baz", os.getcwd()) + baz = os.path.relpath(project.path / "baz", os.getcwd()) qux = os.path.join(foo, "qux") exit_code, activity = renku_cli("run", "--output", foo, "--output", bar, "touch", baz, qux) @@ -76,7 +75,7 @@ def test_explicit_outputs_and_normal_outputs(renku_cli, client, subdirectory): assert {"foo", "bar", "baz", "foo/qux"} == {str(o.default_value) for o in plan.outputs} -def test_explicit_outputs_and_std_output_streams(renku_cli, client, subdirectory): +def test_explicit_outputs_and_std_output_streams(renku_cli, project, subdirectory): """Test that unchanged std output streams can be marked with explicit outputs.""" exit_code, _ = renku_cli("run", "echo", "foo", stdout="bar") assert 0 == exit_code @@ -88,25 +87,25 @@ def test_explicit_outputs_and_std_output_streams(renku_cli, client, subdirectory assert 0 == exit_code -def test_output_directory_with_output_option(runner, renku_cli, client, subdirectory): +def test_output_directory_with_output_option(runner, renku_cli, project, subdirectory): """Test output directories are not deleted with --output.""" - outdir = os.path.relpath(project_context.path / "outdir", os.getcwd()) + out_dir = os.path.relpath(project.path / "out-dir", os.getcwd()) a_script = ("sh", "-c", 'mkdir -p "$0"; touch "$0/$1"') - assert 0 == runner.invoke(cli, ["run", *a_script, outdir, "foo"]).exit_code - result = runner.invoke(cli, ["run", "--output", outdir, *a_script, outdir, "bar"], catch_exceptions=False) + assert 0 == runner.invoke(cli, ["run", *a_script, out_dir, "foo"]).exit_code + result = runner.invoke(cli, ["run", "--output", out_dir, *a_script, out_dir, "bar"], catch_exceptions=False) assert 0 == result.exit_code - assert (project_context.path / "outdir" / "foo").exists() - assert (project_context.path / "outdir" / "bar").exists() + assert (project.path / "out-dir" / "foo").exists() + assert (project.path / "out-dir" / "bar").exists() -def test_output_directory_without_separate_outputs(renku_cli, client): +def test_output_directory_without_separate_outputs(renku_cli, project): """Test output files not listed as separate outputs. See https://github.com/SwissDataScienceCenter/renku-python/issues/387 """ a_script = ("sh", "-c", 'mkdir -p "$0"; touch "$0/$1"') - exit_code, activity = renku_cli("run", *a_script, "outdir", "foo") + exit_code, activity = renku_cli("run", *a_script, "out-dir", "foo") assert 0 == exit_code assert 1 == len(activity.association.plan.outputs) @@ -161,9 +160,9 @@ def test_explicit_outputs_duplicate_name(renku_cli): assert 2 == exit_code -def test_explicit_inputs_and_outputs_are_listed(renku_cli, client): +def test_explicit_inputs_and_outputs_are_listed(renku_cli, project): """Test explicit inputs and outputs will be in generated CWL file.""" - foo = Path(os.path.relpath(project_context.path / "foo", os.getcwd())) + foo = Path(os.path.relpath(project.path / "foo", os.getcwd())) foo.mkdir() renku_cli("run", "touch", "foo/file") renku_cli("run", "touch", "bar", "baz") @@ -186,9 +185,9 @@ def test_explicit_inputs_and_outputs_are_listed(renku_cli, client): assert "baz" == plan.outputs[0].default_value -def test_explicit_inputs_and_outputs_are_listed_with_names(renku_cli, client): +def test_explicit_inputs_and_outputs_are_listed_with_names(renku_cli, project): """Test explicit inputs and outputs will be in generated CWL file.""" - foo = Path(os.path.relpath(project_context.path / "foo", os.getcwd())) + foo = Path(os.path.relpath(project.path / "foo", os.getcwd())) foo.mkdir() renku_cli("run", "touch", "foo/file") renku_cli("run", "touch", "bar", "baz") @@ -217,9 +216,9 @@ def test_explicit_inputs_and_outputs_are_listed_with_names(renku_cli, client): assert "my-output1" == plan.outputs[0].name -def test_explicit_inputs_can_be_in_inputs(renku_cli, client, subdirectory): +def test_explicit_inputs_can_be_in_inputs(renku_cli, project, subdirectory): """Test explicit inputs that are in inputs are treated as normal inputs.""" - foo = os.path.relpath(project_context.path / "foo", os.getcwd()) + foo = os.path.relpath(project.path / "foo", os.getcwd()) renku_cli("run", "touch", foo) exit_code, activity = renku_cli("run", "--input", foo, "--no-output", "ls", foo) @@ -233,7 +232,7 @@ def test_explicit_inputs_can_be_in_inputs(renku_cli, client, subdirectory): assert plan.inputs[0].position is not None -def test_explicit_inputs_in_subdirectories(client, runner): +def test_explicit_inputs_in_subdirectories(project, runner): """Test explicit inputs that are in sub-dirs are made accessible.""" # Set up a script with hard dependency assert 0 == runner.invoke(cli, ["run", "--no-output", "mkdir", "foo"]).exit_code @@ -246,7 +245,7 @@ def test_explicit_inputs_in_subdirectories(client, runner): assert 0 == result.exit_code, format_result_exception(result) # Status must be dirty if foo/bar changes - write_and_commit_file(project_context.repository, project_context.path / "foo" / "bar", "new changes") + write_and_commit_file(project.repository, project.path / "foo" / "bar", "new changes") assert 0 == result.exit_code, format_result_exception(result) @@ -256,13 +255,13 @@ def test_explicit_inputs_in_subdirectories(client, runner): result = runner.invoke(cli, ["update", "--all"]) assert 0 == result.exit_code, format_result_exception(result) - assert (project_context.path / "foo" / "bar").exists() - assert (project_context.path / "script.sh").exists() - assert (project_context.path / "output").exists() + assert (project.path / "foo" / "bar").exists() + assert (project.path / "script.sh").exists() + assert (project.path / "output").exists() def test_no_explicit_or_detected_output(renku_cli): - """Test output detection is disbaled and no explicit output is passed.""" + """Test output detection is disabled and no explicit output is passed.""" exit_code, _ = renku_cli("run", "--no-output-detection", "echo") assert 1 == exit_code @@ -291,7 +290,7 @@ def test_disabled_detection(renku_cli): assert "README.md" == str(plan.outputs[0].default_value) -def test_inputs_must_be_passed_with_no_detection(renku_cli, client): +def test_inputs_must_be_passed_with_no_detection(renku_cli, project): """Test when detection is disabled, inputs must be explicitly passed.""" exit_code, activity = renku_cli( "run", "--no-input-detection", "--input", "Dockerfile", "--no-output", "ls", "-l", "README.md", "Dockerfile" @@ -304,9 +303,9 @@ def test_inputs_must_be_passed_with_no_detection(renku_cli, client): assert "Dockerfile" == str(plan.inputs[0].default_value) -def test_overlapping_explicit_outputs(renku_cli, client): +def test_overlapping_explicit_outputs(renku_cli, project): """Test explicit outputs are not removed even if they overlap.""" - foo = Path(os.path.relpath(project_context.path / "foo", os.getcwd())) + foo = Path(os.path.relpath(project.path / "foo", os.getcwd())) foo.mkdir() renku_cli("run", "touch", "foo/bar") @@ -447,9 +446,9 @@ def test_explicit_parameter_with_same_output(renku_cli): assert "test" == str(plan.parameters[0].default_value) -def test_explicit_parameter_with_same_input(renku_cli, client): +def test_explicit_parameter_with_same_input(renku_cli, project): """Test explicit parameter can coexist with output of same name.""" - foo = Path(os.path.relpath(project_context.path / "foo", os.getcwd())) + foo = Path(os.path.relpath(project.path / "foo", os.getcwd())) foo.mkdir() renku_cli("run", "touch", "test") exit_code, activity = renku_cli("run", "--param", "test", "cat", "test", stdout="target") @@ -465,9 +464,9 @@ def test_explicit_parameter_with_same_input(renku_cli, client): assert "test" == str(plan.parameters[0].default_value) -def test_explicit_parameter_with_same_explicit_input(renku_cli, client): +def test_explicit_parameter_with_same_explicit_input(renku_cli, project): """Test explicit parameter can coexist with output of same name.""" - foo = Path(os.path.relpath(project_context.path / "foo", os.getcwd())) + foo = Path(os.path.relpath(project.path / "foo", os.getcwd())) foo.mkdir() renku_cli("run", "touch", "test") exit_code, activity = renku_cli("run", "--param", "test", "--input", "test", "cat", "test", stdout="target") diff --git a/tests/cli/test_project.py b/tests/cli/test_project.py index f53948ebd6..cc0b835719 100644 --- a/tests/cli/test_project.py +++ b/tests/cli/test_project.py @@ -19,14 +19,13 @@ import json -from renku.domain_model.project_context import project_context from renku.domain_model.provenance.agent import Person from renku.infrastructure.gateway.project_gateway import ProjectGateway from renku.ui.cli import cli from tests.utils import format_result_exception -def test_project_show(runner, client, subdirectory, client_database_injection_manager): +def test_project_show(runner, project, subdirectory): """Check showing project metadata.""" result = runner.invoke(cli, ["project", "show"]) @@ -37,9 +36,9 @@ def test_project_show(runner, client, subdirectory, client_database_injection_ma assert "Renku Version:" in result.output -def test_project_edit(runner, client, subdirectory, client_database_injection_manager): +def test_project_edit(runner, project, subdirectory, with_injection): """Check project metadata editing.""" - (project_context.path / "README.md").write_text("Make repo dirty.") + (project.path / "README.md").write_text("Make repo dirty.") creator = "Forename Surname [Affiliation]" @@ -48,10 +47,10 @@ def test_project_edit(runner, client, subdirectory, client_database_injection_ma "@type": "https://schema.org/specialType", "https://schema.org/specialProperty": "some_unique_value", } - metadata_path = project_context.path / "metadata.json" + metadata_path = project.path / "metadata.json" metadata_path.write_text(json.dumps(metadata)) - commit_sha_before = project_context.repository.head.commit.hexsha + commit_sha_before = project.repository.head.commit.hexsha result = runner.invoke( cli, @@ -75,7 +74,11 @@ def test_project_edit(runner, client, subdirectory, client_database_injection_ma assert "Successfully updated: creator, description, keywords, custom_metadata." in result.output assert "Warning: No email or wrong format for: Forename Surname" in result.output - with client_database_injection_manager(client): + assert project.repository.is_dirty(untracked_files=True) + commit_sha_after = project.repository.head.commit.hexsha + assert commit_sha_before != commit_sha_after + + with with_injection(): project_gateway = ProjectGateway() project = project_gateway.get_project() @@ -86,10 +89,6 @@ def test_project_edit(runner, client, subdirectory, client_database_injection_ma assert metadata == project.annotations[0].body assert {"keyword1", "keyword2"} == set(project.keywords) - assert project_context.repository.is_dirty(untracked_files=True) - commit_sha_after = project_context.repository.head.commit.hexsha - assert commit_sha_before != commit_sha_after - result = runner.invoke(cli, ["project", "show"]) assert 0 == result.exit_code, format_result_exception(result) @@ -103,25 +102,25 @@ def test_project_edit(runner, client, subdirectory, client_database_injection_ma assert 0 == result.exit_code, format_result_exception(result) -def test_project_edit_no_change(runner, client): +def test_project_edit_no_change(runner, project): """Check project metadata editing does not commit when there is no change.""" - (project_context.path / "README.md").write_text("Make repo dirty.") + (project.path / "README.md").write_text("Make repo dirty.") - commit_sha_before = project_context.repository.head.commit.hexsha + commit_sha_before = project.repository.head.commit.hexsha result = runner.invoke(cli, ["project", "edit"], catch_exceptions=False) assert 0 == result.exit_code, format_result_exception(result) assert "Nothing to update." in result.output - commit_sha_after = project_context.repository.head.commit.hexsha + commit_sha_after = project.repository.head.commit.hexsha assert commit_sha_after == commit_sha_before - assert project_context.repository.is_dirty(untracked_files=True) + assert project.repository.is_dirty(untracked_files=True) -def test_project_edit_unset(runner, client, subdirectory, client_database_injection_manager): +def test_project_edit_unset(runner, project, subdirectory, with_injection): """Check project metadata editing.""" - (project_context.path / "README.md").write_text("Make repo dirty.") + (project.path / "README.md").write_text("Make repo dirty.") creator = "Forename Surname [Affiliation]" @@ -130,7 +129,7 @@ def test_project_edit_unset(runner, client, subdirectory, client_database_inject "@type": "https://schema.org/specialType", "https://schema.org/specialProperty": "some_unique_value", } - metadata_path = project_context.path / "metadata.json" + metadata_path = project.path / "metadata.json" metadata_path.write_text(json.dumps(metadata)) result = runner.invoke( @@ -155,7 +154,7 @@ def test_project_edit_unset(runner, client, subdirectory, client_database_inject assert "Successfully updated: creator, description, keywords, custom_metadata." in result.output assert "Warning: No email or wrong format for: Forename Surname" in result.output - commit_sha_before = project_context.repository.head.commit.hexsha + commit_sha_before = project.repository.head.commit.hexsha result = runner.invoke( cli, @@ -165,17 +164,17 @@ def test_project_edit_unset(runner, client, subdirectory, client_database_inject assert 0 == result.exit_code, format_result_exception(result) assert "Successfully updated: keywords, custom_metadata." in result.output - with client_database_injection_manager(client): + assert project.repository.is_dirty(untracked_files=True) + commit_sha_after = project.repository.head.commit.hexsha + assert commit_sha_before != commit_sha_after + + with with_injection(): project_gateway = ProjectGateway() project = project_gateway.get_project() assert not project.annotations assert not project.keywords - assert project_context.repository.is_dirty(untracked_files=True) - commit_sha_after = project_context.repository.head.commit.hexsha - assert commit_sha_before != commit_sha_after - result = runner.invoke(cli, ["project", "show"]) assert 0 == result.exit_code, format_result_exception(result) diff --git a/tests/cli/test_remove.py b/tests/cli/test_remove.py index 3deac69f6a..6ffbf608df 100644 --- a/tests/cli/test_remove.py +++ b/tests/cli/test_remove.py @@ -20,37 +20,34 @@ import pytest from renku.core.constant import DEFAULT_DATA_DIR as DATA_DIR -from renku.domain_model.project_context import project_context from renku.ui.cli import cli from tests.utils import format_result_exception -@pytest.mark.parametrize("datadir_option,datadir", [([], f"{DATA_DIR}/testing"), (["--datadir", "mydir"], "mydir")]) -def test_remove_dataset_file(isolated_runner, client, tmpdir, subdirectory, datadir_option, datadir): +@pytest.mark.parametrize("datadir_option,datadir", [([], f"{DATA_DIR}/testing"), (["--datadir", "my-dir"], "my-dir")]) +def test_remove_dataset_file(isolated_runner, project, tmpdir, subdirectory, datadir_option, datadir): """Test remove of a file that belongs to a dataset.""" - runner = isolated_runner - # create a dataset - result = runner.invoke(cli, ["dataset", "create", "testing"] + datadir_option) + result = isolated_runner.invoke(cli, ["dataset", "create", "testing"] + datadir_option) assert 0 == result.exit_code, format_result_exception(result) assert "OK" in result.output source = tmpdir.join("remove_dataset.file") source.write(DATA_DIR) - result = runner.invoke(cli, ["dataset", "add", "--copy", "testing", source.strpath]) + result = isolated_runner.invoke(cli, ["dataset", "add", "--copy", "testing", source.strpath]) assert 0 == result.exit_code, format_result_exception(result) - path = project_context.path / datadir / "remove_dataset.file" + path = project.path / datadir / "remove_dataset.file" assert path.exists() - result = runner.invoke(cli, ["doctor"]) + result = isolated_runner.invoke(cli, ["doctor"]) assert 0 == result.exit_code, format_result_exception(result) - result = runner.invoke(cli, ["rm", str(project_context.path / datadir)]) + result = isolated_runner.invoke(cli, ["rm", str(project.path / datadir)]) assert 0 == result.exit_code, format_result_exception(result) assert not path.exists() - result = runner.invoke(cli, ["doctor"]) + result = isolated_runner.invoke(cli, ["doctor"]) assert 0 == result.exit_code, format_result_exception(result) diff --git a/tests/cli/test_rerun.py b/tests/cli/test_rerun.py index d2a9c3c6f0..9119297b6a 100644 --- a/tests/cli/test_rerun.py +++ b/tests/cli/test_rerun.py @@ -25,7 +25,6 @@ import pytest from renku.core.plugin.provider import available_workflow_providers -from renku.domain_model.project_context import project_context from renku.infrastructure.gateway.activity_gateway import ActivityGateway from renku.infrastructure.gateway.plan_gateway import PlanGateway from renku.ui.cli import cli @@ -34,7 +33,7 @@ @pytest.mark.parametrize("provider", available_workflow_providers()) @pytest.mark.parametrize("skip_metadata_update", [True, False]) -def test_rerun(project, client, client_database_injection_manager, renku_cli, provider, skip_metadata_update): +def test_rerun(project, with_injection, renku_cli, provider, skip_metadata_update): """Test rerun.""" output = project.path / "output.txt" @@ -48,7 +47,7 @@ def rerun(): cmd.append("--skip-metadata-update") cmd.append(output) assert 0 == renku_cli(*cmd).exit_code - with client_database_injection_manager(client): + with with_injection(): plans = PlanGateway().get_all_plans() activities = ActivityGateway().get_all_activities() assert len(plans) == 1 @@ -112,9 +111,9 @@ def rerun(): @pytest.mark.parametrize("provider", available_workflow_providers()) @pytest.mark.parametrize("source, content", [("input1", "input1 new-input2 old"), ("input2", "input1 old-input2 new")]) -def test_rerun_with_from(repository, renku_cli, provider, source, content): +def test_rerun_with_from(project, renku_cli, provider, source, content): """Test file recreation with specified inputs.""" - cwd = repository.path + cwd = project.path input1 = cwd / "input1" input2 = cwd / "input2" intermediate1 = cwd / "intermediate1" @@ -123,8 +122,8 @@ def test_rerun_with_from(repository, renku_cli, provider, source, content): final2 = cwd / "final2" output = cwd / "output" - write_and_commit_file(repository, input1, "input1 old-") - write_and_commit_file(repository, input2, "input2 old") + write_and_commit_file(project.repository, input1, "input1 old-") + write_and_commit_file(project.repository, input2, "input2 old") assert 0 == renku_cli("run", "cp", input1, intermediate1).exit_code assert 0 == renku_cli("run", "cp", input2, intermediate2).exit_code @@ -134,16 +133,16 @@ def test_rerun_with_from(repository, renku_cli, provider, source, content): assert 0 == renku_cli("run", "cat", final1, final2, stdout=output).exit_code # Update both inputs - write_and_commit_file(repository, input1, "input1 new-") - write_and_commit_file(repository, input2, "input2 new") + write_and_commit_file(project.repository, input1, "input1 new-") + write_and_commit_file(project.repository, input2, "input2 new") - commit_sha_before = repository.head.commit.hexsha + commit_sha_before = project.repository.head.commit.hexsha assert 0 == renku_cli("rerun", "-p", provider, "--from", source, output).exit_code assert content == output.read_text() - commit_sha_after = repository.head.commit.hexsha + commit_sha_after = project.repository.head.commit.hexsha assert commit_sha_before != commit_sha_after @@ -197,10 +196,10 @@ def test_rerun_with_edited_inputs(project, run, no_lfs_warning, runner): @pytest.mark.parametrize("provider", available_workflow_providers()) -def test_rerun_with_no_execution(repository, runner, provider): +def test_rerun_with_no_execution(project, runner, provider): """Test rerun when no workflow is executed.""" - input = os.path.join(repository.path, "data", "input.txt") - write_and_commit_file(repository, input, "content") + input = os.path.join(project.path, "data", "input.txt") + write_and_commit_file(project.repository, input, "content") result = runner.invoke(cli, ["rerun", "-p", provider, input], catch_exceptions=False) @@ -209,9 +208,9 @@ def test_rerun_with_no_execution(repository, runner, provider): @pytest.mark.parametrize("provider", available_workflow_providers()) -def test_output_directory(runner, repository, run, no_lfs_size_limit, provider): +def test_output_directory(runner, project, run, no_lfs_size_limit, provider): """Test detection of output directory.""" - cwd = repository.path + cwd = project.path data = cwd / "source" / "data.txt" source = data.parent source.mkdir(parents=True) @@ -225,8 +224,8 @@ def test_output_directory(runner, repository, run, no_lfs_size_limit, provider): invalid_destination.mkdir(parents=True) (invalid_destination / "non_empty").touch() - repository.add(all=True) - repository.commit("Created source directory", no_verify=True) + project.repository.add(all=True) + project.repository.commit("Created source directory", no_verify=True) cmd = ["run", "cp", "-LRf", str(source), str(destination)] result = runner.invoke(cli, cmd, catch_exceptions=False) @@ -259,13 +258,13 @@ def test_output_directory(runner, repository, run, no_lfs_size_limit, provider): assert not (invalid_destination / data.name).exists() -def test_rerun_overridden_output(repository, renku_cli, runner): +def test_rerun_overridden_output(project, renku_cli, runner): """Test a path where final output is overridden won't be rerun.""" - a = os.path.join(repository.path, "a") - b = os.path.join(repository.path, "b") - c = os.path.join(repository.path, "c") + a = os.path.join(project.path, "a") + b = os.path.join(project.path, "b") + c = os.path.join(project.path, "c") - write_and_commit_file(repository, a, "content") + write_and_commit_file(project.repository, a, "content") assert 0 == runner.invoke(cli, ["run", "--name", "r1", "cp", a, b]).exit_code time.sleep(1) @@ -281,14 +280,14 @@ def test_rerun_overridden_output(repository, renku_cli, runner): assert "r3" in result.output -def test_rerun_overridden_outputs_partially(repository, renku_cli, runner): +def test_rerun_overridden_outputs_partially(project, renku_cli, runner): """Test a path where one of the final output is overridden won't be rerun.""" - a = os.path.join(repository.path, "a") - b = os.path.join(repository.path, "b") - c = os.path.join(repository.path, "c") - d = os.path.join(repository.path, "d") + a = os.path.join(project.path, "a") + b = os.path.join(project.path, "b") + c = os.path.join(project.path, "c") + d = os.path.join(project.path, "d") - write_and_commit_file(repository, a, "content") + write_and_commit_file(project.repository, a, "content") assert 0 == runner.invoke(cli, ["run", "--name", "r1", "cp", a, b]).exit_code time.sleep(1) @@ -315,14 +314,14 @@ def test_rerun_overridden_outputs_partially(repository, renku_cli, runner): assert 0 == result.exit_code, format_result_exception(result) -def test_rerun_multiple_paths_common_output(repository, renku_cli, runner): +def test_rerun_multiple_paths_common_output(project, renku_cli, runner): """Test when multiple paths generate the same output only the most recent path will be rerun.""" - a = os.path.join(repository.path, "a") - b = os.path.join(repository.path, "b") - c = os.path.join(repository.path, "c") - d = os.path.join(repository.path, "d") + a = os.path.join(project.path, "a") + b = os.path.join(project.path, "b") + c = os.path.join(project.path, "c") + d = os.path.join(project.path, "d") - write_and_commit_file(repository, a, "content") + write_and_commit_file(project.repository, a, "content") assert 0 == runner.invoke(cli, ["run", "--name", "r1", "cp", a, b]).exit_code time.sleep(1) @@ -341,16 +340,16 @@ def test_rerun_multiple_paths_common_output(repository, renku_cli, runner): assert "r4" in result.output -def test_rerun_output_in_subdirectory(runner, client): +def test_rerun_output_in_subdirectory(runner, project): """Test re-run when an output is in a sub-directory.""" - output = project_context.path / "sub-dir" / "output" - write_and_commit_file(project_context.repository, output, "") + output = project.path / "sub-dir" / "output" + write_and_commit_file(project.repository, output, "") result = runner.invoke(cli, ["run", "bash", "-c", 'touch "$0" ; echo data > "$0"', output]) assert 0 == result.exit_code, format_result_exception(result) - write_and_commit_file(project_context.repository, output, "") + write_and_commit_file(project.repository, output, "") result = runner.invoke(cli, ["rerun", output]) diff --git a/tests/cli/test_rollback.py b/tests/cli/test_rollback.py index 04e71b7168..2d04a77f70 100644 --- a/tests/cli/test_rollback.py +++ b/tests/cli/test_rollback.py @@ -15,14 +15,13 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Test ``log`` command.""" +"""Test ``rollback`` command.""" -from renku.domain_model.project_context import project_context from renku.ui.cli import cli from tests.utils import format_result_exception -def test_rollback(client, runner, project): +def test_rollback(runner, project): """Test renku rollback.""" result = runner.invoke(cli, ["run", "--name", "run1", "touch", "foo"]) assert 0 == result.exit_code, format_result_exception(result) @@ -30,11 +29,11 @@ def test_rollback(client, runner, project): result = runner.invoke(cli, ["run", "--name", "run2", "cp", "foo", "bar"]) assert 0 == result.exit_code, format_result_exception(result) - metadata_path = project_context.path / "input" + metadata_path = project.path / "input" metadata_path.write_text("input") - project_context.repository.add(["input"]) - project_context.repository.commit("add input") + project.repository.add("input") + project.repository.commit("add input") result = runner.invoke(cli, ["dataset", "create", "my-dataset"]) assert 0 == result.exit_code, format_result_exception(result) @@ -50,8 +49,8 @@ def test_rollback(client, runner, project): metadata_path.write_text("changed input") - project_context.repository.add(["input"]) - project_context.repository.commit("change input") + project.repository.add("input") + project.repository.commit("change input") result = runner.invoke(cli, ["run", "--name", "run3", "cp", "input", "output"]) assert 0 == result.exit_code, format_result_exception(result) diff --git a/tests/cli/test_run.py b/tests/cli/test_run.py index 0235214c33..5b2d26cc0b 100644 --- a/tests/cli/test_run.py +++ b/tests/cli/test_run.py @@ -22,7 +22,6 @@ import pytest -from renku.domain_model.project_context import project_context from renku.domain_model.workflow.plan import Plan from renku.infrastructure.gateway.activity_gateway import ActivityGateway from renku.infrastructure.gateway.plan_gateway import PlanGateway @@ -43,14 +42,14 @@ def test_run_simple(runner, project): assert "test" in result.output -def test_run_many_args(client, run): +def test_run_many_args(project, run): """Test a renku run command which implicitly relies on many inputs.""" os.mkdir("files") output = "output.txt" for i in range(103): os.system("touch files/{}.txt".format(i)) - project_context.repository.add("files/") - project_context.repository.commit("add many files") + project.repository.add("files/") + project.repository.commit("add many files") exit_code = run(args=("run", "ls", "files/"), stdout=output) assert 0 == exit_code @@ -76,7 +75,7 @@ def test_run_clean(runner, project, run_shell): @pytest.mark.serial @pytest.mark.shelled -def test_run_external_command_file(runner, client, project, run_shell, client_database_injection_manager): +def test_run_external_command_file(runner, project, run_shell, with_injection): """Test tracking of run command in clean repo.""" # Run a shell command with pipe. output = run_shell('renku run $(which echo) "a unique string" > my_output_file') @@ -91,14 +90,14 @@ def test_run_external_command_file(runner, client, project, run_shell, client_da assert "my_output_file" in result.output assert "a unique string" in result.output - with client_database_injection_manager(client): + with with_injection(): plan_gateway = PlanGateway() plan = plan_gateway.get_all_plans()[0] assert plan.command assert plan.command.endswith("/echo") -def test_run_metadata(renku_cli, runner, client, client_database_injection_manager): +def test_run_metadata(renku_cli, runner, project, with_injection): """Test run with workflow metadata.""" exit_code, activity = renku_cli( "run", "--name", "run-1", "--description", "first run", "--keyword", "key1", "--keyword", "key2", "touch", "foo" @@ -110,7 +109,7 @@ def test_run_metadata(renku_cli, runner, client, client_database_injection_manag assert "first run" == plan.description assert {"key1", "key2"} == set(plan.keywords) - with client_database_injection_manager(client): + with with_injection(): plan_gateway = PlanGateway() plan = plan_gateway.get_by_id(plan.id) assert "run-1" == plan.name @@ -121,7 +120,7 @@ def test_run_metadata(renku_cli, runner, client, client_database_injection_manag assert 0 == result.exit_code, format_result_exception(result) -def test_run_with_outside_files(renku_cli, runner, client, client_database_injection_manager, tmpdir): +def test_run_with_outside_files(renku_cli, runner, project, with_injection, tmpdir): """Test run with files that are outside the project.""" external_file = tmpdir.join("file_1") @@ -133,7 +132,7 @@ def test_run_with_outside_files(renku_cli, runner, client, client_database_injec plan = activity.association.plan assert "run-1" == plan.name - with client_database_injection_manager(client): + with with_injection(): plan_gateway = PlanGateway() plan = cast(Plan, plan_gateway.get_by_id(plan.id)) assert "run-1" == plan.name @@ -153,27 +152,27 @@ def test_run_with_outside_files(renku_cli, runner, client, client_database_injec (["echo", "-n", "some long value"], "echo--n-some_long_v-"), ], ) -def test_generated_run_name(runner, client, command, name, client_database_injection_manager): +def test_generated_run_name(runner, project, command, name, with_injection): """Test generated run name.""" result = runner.invoke(cli, ["run", "--no-output"] + command) assert 0 == result.exit_code, format_result_exception(result) - with client_database_injection_manager(client): + with with_injection(): plan_gateway = PlanGateway() plan = plan_gateway.get_all_plans()[0] assert name == plan.name[:-5] -def test_run_invalid_name(runner, client): +def test_run_invalid_name(runner, project): """Test run with invalid name.""" result = runner.invoke(cli, ["run", "--name", "invalid name", "touch", "foo"]) assert 2 == result.exit_code - assert not (project_context.path / "foo").exists() + assert not (project.path / "foo").exists() assert "Invalid name: 'invalid name' (Hint: 'invalid_name' is valid)." in result.output -def test_run_argument_parameters(runner, client, client_database_injection_manager): +def test_run_argument_parameters(runner, project, with_injection): """Test names and values of workflow/provenance arguments and parameters.""" result = runner.invoke( cli, @@ -194,7 +193,7 @@ def test_run_argument_parameters(runner, client, client_database_injection_manag ) assert 0 == result.exit_code, format_result_exception(result) - with client_database_injection_manager(client): + with with_injection(): plan_gateway = PlanGateway() plans = plan_gateway.get_all_plans() assert 1 == len(plans) @@ -232,7 +231,7 @@ def test_run_argument_parameters(runner, client, client_database_injection_manag assert 0 == result.exit_code, format_result_exception(result) -def test_run_non_existing_command(runner, client): +def test_run_non_existing_command(runner, project): """Test run with a non-existing command.""" result = runner.invoke(cli, ["run", "non-existing_command"]) @@ -240,7 +239,7 @@ def test_run_non_existing_command(runner, client): assert "Cannot execute command 'non-existing_command'" in result.output -def test_run_prints_plan(runner, client): +def test_run_prints_plan(runner, project): """Test run shows the generated plan with --verbose.""" result = runner.invoke(cli, ["run", "--verbose", "--name", "echo-command", "--no-output", "echo", "data"]) @@ -249,32 +248,32 @@ def test_run_prints_plan(runner, client): assert "Name:" not in result.stdout -def test_run_prints_plan_when_stdout_redirected(runner, client): +def test_run_prints_plan_when_stdout_redirected(runner, project): """Test run shows the generated plan in stderr if stdout is redirected to a file.""" result = runner.invoke(cli, ["run", "--verbose", "--name", "echo-command", "echo", "data"], stdout="output") assert 0 == result.exit_code, format_result_exception(result) assert "Name: echo-command" in result.stderr assert "Name:" not in result.stdout - assert "Name:" not in (project_context.path / "output").read_text() + assert "Name:" not in (project.path / "output").read_text() -def test_run_prints_plan_when_stderr_redirected(runner, client): +def test_run_prints_plan_when_stderr_redirected(runner, project): """Test run shows the generated plan in stdout if stderr is redirected to a file.""" result = runner.invoke(cli, ["run", "--verbose", "--name", "echo-command", "echo", "data"], stderr="output") assert 0 == result.exit_code, format_result_exception(result) - assert "Name: echo-command" in (project_context.path / "output").read_text() + assert "Name: echo-command" in (project.path / "output").read_text() assert "Name:" not in result.output -def test_run_with_external_files(runner, client, directory_tree): +def test_run_with_external_files(runner, project, directory_tree): """Test run commands that use external files.""" assert 0 == runner.invoke(cli, ["dataset", "add", "-c", "--external", "my-dataset", directory_tree]).exit_code - path = project_context.path / "data" / "my-dataset" / "directory_tree" / "file1" + path = project.path / "data" / "my-dataset" / "directory_tree" / "file1" result = runner.invoke(cli, ["run", "tail", path], stdout="output") assert 0 == result.exit_code, format_result_exception(result) - assert "file1" in (project_context.path / "output").read_text() + assert "file1" in (project.path / "output").read_text() diff --git a/tests/cli/test_save.py b/tests/cli/test_save.py index 643cdfee64..b0f09204cf 100644 --- a/tests/cli/test_save.py +++ b/tests/cli/test_save.py @@ -19,15 +19,14 @@ import os -from renku.domain_model.project_context import project_context from renku.infrastructure.repository import Repository from renku.ui.cli import cli from tests.utils import format_result_exception, write_and_commit_file -def test_save_without_remote(runner, project, client, tmpdir_factory): +def test_save_without_remote(runner, project, tmpdir_factory): """Test saving local changes.""" - with (project_context.path / "tracked").open("w") as fp: + with (project.path / "tracked").open("w") as fp: fp.write("tracked file") result = runner.invoke(cli, ["save", "-m", "save changes", "tracked"], catch_exceptions=False) @@ -41,41 +40,41 @@ def test_save_without_remote(runner, project, client, tmpdir_factory): assert 0 == result.exit_code, format_result_exception(result) assert "tracked" in result.output - assert "Saved changes to: tracked" in project_context.repository.head.commit.message + assert "Saved changes to: tracked" in project.repository.head.commit.message - project_context.repository.remotes.remove("origin") + project.repository.remotes.remove("origin") -def test_save_with_remote(runner, project, project_with_remote): +def test_save_with_remote(runner, project_with_remote): """Test saving local changes.""" - with (project_context.path / "tracked").open("w") as fp: + with (project_with_remote.path / "tracked").open("w") as fp: fp.write("tracked file") result = runner.invoke(cli, ["save", "-m", "save changes", "tracked"], catch_exceptions=False) assert 0 == result.exit_code, format_result_exception(result) assert "tracked" in result.output - assert "save changes" in project_with_remote.head.commit.message + assert "save changes" in project_with_remote.repository.head.commit.message def test_save_with_merge_conflict(runner, project, project_with_remote): """Test saving local changes.""" - branch = project_with_remote.active_branch.name - with (project_context.path / "tracked").open("w") as fp: + branch = project_with_remote.repository.active_branch.name + with (project_with_remote.path / "tracked").open("w") as fp: fp.write("tracked file") result = runner.invoke(cli, ["save", "-m", "save changes", "tracked"], catch_exceptions=False) assert 0 == result.exit_code, format_result_exception(result) assert "tracked" in result.output - assert "save changes" in project_context.repository.head.commit.message + assert "save changes" in project_with_remote.repository.head.commit.message - with (project_context.path / "tracked").open("w") as fp: + with (project_with_remote.path / "tracked").open("w") as fp: fp.write("local changes") - project_context.repository.add(project_context.path / "tracked") - project_context.repository.commit("amended commit", amend=True) + project_with_remote.repository.add(project_with_remote.path / "tracked") + project_with_remote.repository.commit("amended commit", amend=True) - with (project_context.path / "tracked").open("w") as fp: + with (project_with_remote.path / "tracked").open("w") as fp: fp.write("new version") result = runner.invoke(cli, ["save", "-m", "save changes", "tracked"], input="n", catch_exceptions=False) @@ -84,27 +83,27 @@ def test_save_with_merge_conflict(runner, project, project_with_remote): assert "There were conflicts when updating the local data" in result.output assert "Successfully saved to remote branch" in result.output assert branch in result.output - assert "save changes" in project_context.repository.head.commit.message + assert "save changes" in project_with_remote.repository.head.commit.message -def test_save_with_staged(runner, project, project_with_remote): +def test_save_with_staged(runner, project_with_remote): """Test saving local changes.""" - write_and_commit_file(project_context.repository, project_context.path / "deleted", "deleted file") - os.remove(project_context.path / "deleted") + write_and_commit_file(project_with_remote.repository, project_with_remote.path / "deleted", "deleted file") + os.remove(project_with_remote.path / "deleted") - (project_context.path / "tracked").write_text("tracked file") + (project_with_remote.path / "tracked").write_text("tracked file") - (project_context.path / "untracked").write_text("untracked file") + (project_with_remote.path / "untracked").write_text("untracked file") - project_context.repository.add("tracked", "deleted") + project_with_remote.repository.add("tracked", "deleted") result = runner.invoke(cli, ["save", "-m", "save changes", "modified", "deleted"], catch_exceptions=False) assert 1 == result.exit_code assert "These files are in the git staging area, but " in result.output assert "tracked" in result.output - assert "tracked" in [f.a_path for f in project_context.repository.staged_changes] - assert "untracked" in project_context.repository.untracked_files + assert "tracked" in [f.a_path for f in project_with_remote.repository.staged_changes] + assert "untracked" in project_with_remote.repository.untracked_files result = runner.invoke( cli, ["save", "-m", "save changes", "tracked", "untracked", "deleted"], catch_exceptions=False @@ -112,6 +111,6 @@ def test_save_with_staged(runner, project, project_with_remote): assert 0 == result.exit_code, format_result_exception(result) assert {"tracked", "untracked", "deleted"} == { - f.a_path for f in project_context.repository.head.commit.get_changes() + f.a_path for f in project_with_remote.repository.head.commit.get_changes() } - assert not project_context.repository.is_dirty(untracked_files=True) + assert not project_with_remote.repository.is_dirty(untracked_files=True) diff --git a/tests/cli/test_service.py b/tests/cli/test_service.py index 957e0d9c8c..a7f2066a1d 100644 --- a/tests/cli/test_service.py +++ b/tests/cli/test_service.py @@ -16,6 +16,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """Test ``service`` command.""" + from pathlib import Path from time import sleep diff --git a/tests/cli/test_status.py b/tests/cli/test_status.py index ad05ff2b63..68a6cf3179 100644 --- a/tests/cli/test_status.py +++ b/tests/cli/test_status.py @@ -23,12 +23,12 @@ from tests.utils import format_result_exception, write_and_commit_file -def test_status_exit_codes(runner, repository, subdirectory): +def test_status_exit_codes(runner, project, subdirectory): """Test status check returns 0 when up-to-date and 1 otherwise.""" - source = os.path.join(repository.path, "source.txt") - output = os.path.join(repository.path, "data", "output.txt") + source = os.path.join(project.path, "source.txt") + output = os.path.join(project.path, "data", "output.txt") - write_and_commit_file(repository, source, "content") + write_and_commit_file(project.repository, source, "content") result = runner.invoke(cli, ["run", "cp", source, output]) assert 0 == result.exit_code, format_result_exception(result) @@ -40,7 +40,7 @@ def test_status_exit_codes(runner, repository, subdirectory): assert 0 == result.exit_code, format_result_exception(result) - write_and_commit_file(repository, source, "new content") + write_and_commit_file(project.repository, source, "new content") result = runner.invoke(cli, ["status"]) diff --git a/tests/cli/test_template.py b/tests/cli/test_template.py index 1f1be2d2a9..414486de64 100644 --- a/tests/cli/test_template.py +++ b/tests/cli/test_template.py @@ -89,7 +89,7 @@ def test_template_show(isolated_runner): sys.argv = argv -def test_template_show_no_id(runner, client): +def test_template_show_no_id(runner, project): """Test show current project's template.""" result = runner.invoke(cli, ["template", "show"]) @@ -128,24 +128,24 @@ def test_template_update_outside_a_renku_project(isolated_runner): assert "is not a renku repository" in result.output -def test_template_set_failure(runner, client, client_database_injection_manager): +def test_template_set_failure(runner, project, with_injection): """Test setting template in a project with template fails.""" result = runner.invoke(cli, ["template", "set"]) assert 1 == result.exit_code, format_result_exception(result) assert "Project already has a template" in result.output - with client_database_injection_manager(client): + with with_injection(): assert "python-minimal" == project_context.project.template_id -def test_template_set(runner, client, client_database_injection_manager): +def test_template_set(runner, project, with_injection): """Test setting a new template in a project.""" from renku.version import __template_version__ result = runner.invoke(cli, ["template", "set", "--force", "R-minimal"]) assert 0 == result.exit_code, format_result_exception(result) - with client_database_injection_manager(client): + with with_injection(): assert "R-minimal" == project_context.project.template_id assert __template_version__ == project_context.project.template_version assert __template_version__ == project_context.project.template_ref @@ -154,62 +154,62 @@ def test_template_set(runner, client, client_database_injection_manager): assert 0 == result.exit_code, format_result_exception(result) -def test_template_set_overwrites_modified(runner, repository, client_database_injection_manager): +def test_template_set_overwrites_modified(runner, project, with_injection): """Test setting a new template in a project overwrite modified files.""" - write_and_commit_file(repository, "Dockerfile", "my-modifications") + write_and_commit_file(project.repository, "Dockerfile", "my-modifications") result = runner.invoke(cli, ["template", "set", "--force", "R-minimal"]) assert 0 == result.exit_code, format_result_exception(result) - with client_database_injection_manager(repository): + with with_injection(): assert "R-minimal" == project_context.project.template_id - assert "my-modifications" not in (repository.path / "Dockerfile").read_text() - assert not repository.is_dirty(untracked_files=True) + assert "my-modifications" not in (project.path / "Dockerfile").read_text() + assert not project.repository.is_dirty(untracked_files=True) @pytest.mark.parametrize("overwrite, found", [["y", False], ["n", True]]) -def test_template_set_interactive(runner, repository, client_database_injection_manager, overwrite, found): +def test_template_set_interactive(runner, project, with_injection, overwrite, found): """Test setting a template in interactive mode.""" - write_and_commit_file(repository, "Dockerfile", "my-modifications") + write_and_commit_file(project.repository, "Dockerfile", "my-modifications") result = runner.invoke(cli, ["template", "set", "-f", "R-minimal", "-i"], input=f"{overwrite}\n" * 420) assert 0 == result.exit_code, format_result_exception(result) - with client_database_injection_manager(repository): + with with_injection(): assert "R-minimal" == project_context.project.template_id - assert ("my-modifications" in (repository.path / "Dockerfile").read_text()) is found - assert not repository.is_dirty(untracked_files=True) + assert ("my-modifications" in (project.path / "Dockerfile").read_text()) is found + assert not project.repository.is_dirty(untracked_files=True) -def test_template_set_preserve_renku_version(runner, repository): +def test_template_set_preserve_renku_version(runner, project): """Test setting a template and overwriting Dockerfile still preserves Renku version.""" - content = (repository.path / "Dockerfile").read_text() + content = (project.path / "Dockerfile").read_text() new_content = re.sub(r"^\s*ARG RENKU_VERSION=(.+)$", "ARG RENKU_VERSION=0.0.42", content, flags=re.MULTILINE) - write_and_commit_file(repository, "Dockerfile", new_content) + write_and_commit_file(project.repository, "Dockerfile", new_content) result = runner.invoke(cli, ["template", "set", "-f", "R-minimal", "--interactive"], input="y\n" * 420) assert 0 == result.exit_code, format_result_exception(result) - content = (repository.path / "Dockerfile").read_text() + content = (project.path / "Dockerfile").read_text() assert new_content != content assert "ARG RENKU_VERSION=0.0.42" in content -def test_template_set_dry_run(runner, repository): +def test_template_set_dry_run(runner, project): """Test set dry-run doesn't make any changes.""" - commit_sha_before = repository.head.commit.hexsha + commit_sha_before = project.repository.head.commit.hexsha result = runner.invoke(cli, ["template", "set", "-f", "R-minimal", "--dry-run"]) assert 0 == result.exit_code, format_result_exception(result) - assert not repository.is_dirty() - assert commit_sha_before == repository.head.commit.hexsha + assert not project.repository.is_dirty() + assert commit_sha_before == project.repository.head.commit.hexsha @pytest.mark.integration -def test_template_update(runner, client, client_database_injection_manager): +def test_template_update(runner, project, with_injection): """Test updating a template.""" result = runner.invoke( cli, @@ -218,7 +218,7 @@ def test_template_update(runner, client, client_database_injection_manager): ) assert 0 == result.exit_code, format_result_exception(result) - with client_database_injection_manager(client): + with with_injection(): assert "python-minimal" == project_context.project.template_id assert "0.3.2" == project_context.project.template_ref assert "b9ab266fba136bdecfa91dc8d7b6d36b9d427012" == project_context.project.template_version @@ -227,7 +227,7 @@ def test_template_update(runner, client, client_database_injection_manager): assert 0 == result.exit_code, format_result_exception(result) assert "Template is up-to-date" not in result.output - with client_database_injection_manager(client): + with with_injection(): assert "python-minimal" == project_context.project.template_id assert Version(project_context.project.template_ref) > Version("0.3.2") assert "6c59d8863841baeca8f30062fd16c650cf67da3b" != project_context.project.template_version @@ -241,7 +241,7 @@ def test_template_update(runner, client, client_database_injection_manager): assert 0 == result.exit_code, format_result_exception(result) -def test_template_update_latest_version(runner, client): +def test_template_update_latest_version(runner, project): """Test updating template that is the latest version.""" result = runner.invoke(cli, ["template", "update"]) @@ -250,17 +250,17 @@ def test_template_update_latest_version(runner, client): @pytest.mark.integration -def test_template_update_missing_repo(runner, client_with_template): +def test_template_update_missing_repo(runner, project_with_template): """Test update with a none-existing template repository fails with expected error.""" result = runner.invoke(cli, ["template", "update"]) assert 1 == result.exit_code assert "Template cannot be fetched" in result.output - assert not client_with_template.is_dirty() + assert not project_with_template.repository.is_dirty() @pytest.mark.integration -def test_template_update_dry_run(runner, repository): +def test_template_update_dry_run(runner, project): """Test update dry-run doesn't make any changes.""" result = runner.invoke( cli, @@ -270,20 +270,20 @@ def test_template_update_dry_run(runner, repository): assert 0 == result.exit_code, format_result_exception(result) - commit_sha_before = repository.head.commit.hexsha + commit_sha_before = project.repository.head.commit.hexsha result = runner.invoke(cli, ["template", "update", "--dry-run"]) assert 0 == result.exit_code, format_result_exception(result) - assert not repository.is_dirty() - assert commit_sha_before == repository.head.commit.hexsha + assert not project.repository.is_dirty() + assert commit_sha_before == project.repository.head.commit.hexsha -def test_git_hook_for_modified_immutable_template_files(runner, client_with_template): +def test_git_hook_for_modified_immutable_template_files(runner, project_with_template): """Test check for modified immutable template files.""" - (client_with_template.path / "immutable.file").write_text("Locally modified immutable files") + (project_with_template.path / "immutable.file").write_text("Locally modified immutable files") - with chdir(client_with_template.path): + with chdir(project_with_template.path): result = runner.invoke(cli, ["check-immutable-template-files", "Dockerfile"]) assert result.exit_code == 0, result.output @@ -292,9 +292,7 @@ def test_git_hook_for_modified_immutable_template_files(runner, client_with_temp assert "immutable.file" in result.output -def test_template_update_with_parameters( - runner, client_with_template, templates_source, client_database_injection_manager -): +def test_template_update_with_parameters(runner, project_with_template, templates_source, with_injection): """Test update prompts for new template parameters.""" parameter = TemplateParameter(name="new-parameter", description="", type="", possible_values=[], default=None) templates_source.update(id="dummy", version="2.0.0", parameters=[parameter]) @@ -303,15 +301,13 @@ def test_template_update_with_parameters( assert result.exit_code == 0, result.output - with client_database_injection_manager(client_with_template): + with with_injection(): template_metadata = TemplateMetadata.from_project(project=project_context.project) assert "new-parameter" in template_metadata.metadata assert "new-value" == template_metadata.metadata["new-parameter"] -def test_template_update_with_parameters_with_defaults( - runner, client_with_template, templates_source, client_database_injection_manager -): +def test_template_update_with_parameters_with_defaults(runner, project_with_template, templates_source, with_injection): """Test update doesn't prompt for new template parameters with default value.""" parameter = TemplateParameter(name="new-parameter", description="", type="", possible_values=[], default="def-val") templates_source.update(id="dummy", version="2.0.0", parameters=[parameter]) @@ -320,15 +316,13 @@ def test_template_update_with_parameters_with_defaults( assert result.exit_code == 0, result.output - with client_database_injection_manager(client_with_template): + with with_injection(): template_metadata = TemplateMetadata.from_project(project=project_context.project) assert "new-parameter" in template_metadata.metadata assert "def-val" == template_metadata.metadata["new-parameter"] -def test_template_set_with_parameters( - runner, client_with_template, templates_source, client_database_injection_manager -): +def test_template_set_with_parameters(runner, project_with_template, templates_source, with_injection): """Test template set doesn't prompts for new template parameters when passed on command line.""" parameter = TemplateParameter(name="new-parameter", description="", type="", possible_values=[], default=None) templates_source.update(id="dummy", version="2.0.0", parameters=[parameter]) @@ -337,7 +331,7 @@ def test_template_set_with_parameters( assert result.exit_code == 0, result.output - with client_database_injection_manager(client_with_template): + with with_injection(): template_metadata = TemplateMetadata.from_project(project=project_context.project) assert "new-parameter" in template_metadata.metadata assert "param-value" == template_metadata.metadata["new-parameter"] @@ -367,7 +361,7 @@ def test_template_validate(runner, tmpdir_factory): "id": "test", "name": "test", "description": "description", - "variables": {"some_string": {"description": "somestr desc", "type": "string"}}, + "variables": {"some_string": {"description": "some str desc", "type": "string"}}, } ], ) diff --git a/tests/cli/test_update.py b/tests/cli/test_update.py index e0b40d5e9c..663f21ffe6 100644 --- a/tests/cli/test_update.py +++ b/tests/cli/test_update.py @@ -34,17 +34,17 @@ @pytest.mark.parametrize("provider", available_workflow_providers()) @pytest.mark.parametrize("skip_metadata_update", [True, False]) -def test_update(runner, repository, renku_cli, client_database_injection_manager, provider, skip_metadata_update): +def test_update(runner, project, renku_cli, with_injection, provider, skip_metadata_update): """Test output is updated when source changes.""" - source = os.path.join(repository.path, "source.txt") - output = os.path.join(repository.path, "output.txt") + source = os.path.join(project.path, "source.txt") + output = os.path.join(project.path, "output.txt") - write_and_commit_file(repository, source, "content") + write_and_commit_file(project.repository, source, "content") exit_code, previous_activity = renku_cli("run", "head", "-1", source, stdout=output) assert 0 == exit_code - write_and_commit_file(repository, source, "changed content") + write_and_commit_file(project.repository, source, "changed content") cmd = ["update", "-p", provider, "--all"] if skip_metadata_update: @@ -63,7 +63,7 @@ def test_update(runner, repository, renku_cli, client_database_injection_manager result = runner.invoke(cli, ["status"]) assert 0 == result.exit_code, format_result_exception(result) - with client_database_injection_manager(repository): + with with_injection(): activity_gateway = ActivityGateway() activity_collections = activity_gateway.get_all_activity_collections() @@ -81,22 +81,20 @@ def test_update(runner, repository, renku_cli, client_database_injection_manager @pytest.mark.parametrize("provider", available_workflow_providers()) @pytest.mark.parametrize("skip_metadata_update", [True, False]) -def test_update_multiple_steps( - runner, repository, renku_cli, client_database_injection_manager, provider, skip_metadata_update -): +def test_update_multiple_steps(runner, project, renku_cli, with_injection, provider, skip_metadata_update): """Test update in a multi-step workflow.""" - source = os.path.join(repository.path, "source.txt") - intermediate = os.path.join(repository.path, "intermediate.txt") - output = os.path.join(repository.path, "output.txt") + source = os.path.join(project.path, "source.txt") + intermediate = os.path.join(project.path, "intermediate.txt") + output = os.path.join(project.path, "output.txt") - write_and_commit_file(repository, source, "content") + write_and_commit_file(project.repository, source, "content") exit_code, activity1 = renku_cli("run", "cp", source, intermediate) assert 0 == exit_code exit_code, activity2 = renku_cli("run", "cp", intermediate, output) assert 0 == exit_code - write_and_commit_file(repository, source, "changed content") + write_and_commit_file(project.repository, source, "changed content") cmd = ["update", "-p", provider, "--all"] if skip_metadata_update: @@ -118,7 +116,7 @@ def test_update_multiple_steps( result = runner.invoke(cli, ["status"]) assert 0 == result.exit_code, format_result_exception(result) - with client_database_injection_manager(repository): + with with_injection(): activity_gateway = ActivityGateway() activity_collections = activity_gateway.get_all_activity_collections() @@ -132,20 +130,20 @@ def test_update_multiple_steps( @pytest.mark.parametrize("provider", available_workflow_providers()) -def test_update_multiple_steps_with_path(runner, repository, renku_cli, provider): +def test_update_multiple_steps_with_path(runner, project, renku_cli, provider): """Test update in a multi-step workflow when a path is specified.""" - source = os.path.join(repository.path, "source.txt") - intermediate = os.path.join(repository.path, "intermediate.txt") - output = os.path.join(repository.path, "output.txt") + source = os.path.join(project.path, "source.txt") + intermediate = os.path.join(project.path, "intermediate.txt") + output = os.path.join(project.path, "output.txt") - write_and_commit_file(repository, source, "content") + write_and_commit_file(project.repository, source, "content") exit_code, activity1 = renku_cli("run", "cp", source, intermediate) assert 0 == exit_code exit_code, _ = renku_cli("run", "cp", intermediate, output) assert 0 == exit_code - write_and_commit_file(repository, source, "changed content") + write_and_commit_file(project.repository, source, "changed content") exit_code, activity = renku_cli("update", "-p", provider, intermediate) @@ -164,19 +162,19 @@ def test_update_multiple_steps_with_path(runner, repository, renku_cli, provider @pytest.mark.parametrize("provider", available_workflow_providers()) -def test_update_with_directory_paths(repository, renku_cli, provider): +def test_update_with_directory_paths(project, renku_cli, provider): """Test update when a directory path is specified.""" - data = os.path.join(repository.path, "data", "dataset", "my-data") + data = os.path.join(project.path, "data", "dataset", "my-data") Path(data).mkdir(parents=True, exist_ok=True) - source = os.path.join(repository.path, "source.txt") + source = os.path.join(project.path, "source.txt") output = os.path.join(data, "output.txt") - write_and_commit_file(repository, source, "content") + write_and_commit_file(project.repository, source, "content") exit_code, previous_activity = renku_cli("run", "head", "-1", source, stdout=output) assert 0 == exit_code - write_and_commit_file(repository, source, "changed content") + write_and_commit_file(project.repository, source, "changed content") exit_code, activity = renku_cli("update", "-p", provider, data) @@ -187,26 +185,26 @@ def test_update_with_directory_paths(repository, renku_cli, provider): @pytest.mark.parametrize("provider", available_workflow_providers()) -def test_multiple_updates(runner, repository, renku_cli, provider): +def test_multiple_updates(runner, project, renku_cli, provider): """Test multiple updates of the same source.""" - source = os.path.join(repository.path, "source.txt") - intermediate = os.path.join(repository.path, "intermediate.txt") - output = os.path.join(repository.path, "output.txt") + source = os.path.join(project.path, "source.txt") + intermediate = os.path.join(project.path, "intermediate.txt") + output = os.path.join(project.path, "output.txt") - write_and_commit_file(repository, source, "content") + write_and_commit_file(project.repository, source, "content") exit_code, activity1 = renku_cli("run", "cp", source, intermediate) assert 0 == exit_code exit_code, activity2 = renku_cli("run", "cp", intermediate, output) assert 0 == exit_code - write_and_commit_file(repository, source, "changed content") + write_and_commit_file(project.repository, source, "changed content") exit_code, _ = renku_cli("update", "-p", provider, "--all") assert 0 == exit_code assert "changed content" == Path(intermediate).read_text() - write_and_commit_file(repository, source, "more changed content") + write_and_commit_file(project.repository, source, "more changed content") exit_code, activities = renku_cli("update", "-p", provider, "--all") @@ -225,17 +223,17 @@ def test_multiple_updates(runner, repository, renku_cli, provider): @pytest.mark.parametrize("provider", available_workflow_providers()) -def test_update_workflow_without_outputs(runner, repository, run, provider): +def test_update_workflow_without_outputs(runner, project, run, provider): """Test workflow without outputs.""" - source = os.path.join(repository.path, "source.txt") + source = os.path.join(project.path, "source.txt") - write_and_commit_file(repository, source, "content") + write_and_commit_file(project.repository, source, "content") result = runner.invoke(cli, ["run", "cat", "--no-output", source]) assert 0 == result.exit_code, format_result_exception(result) - write_and_commit_file(repository, source, "changes") + write_and_commit_file(project.repository, source, "changes") assert 1 == runner.invoke(cli, ["status"]).exit_code @@ -248,14 +246,14 @@ def test_update_workflow_without_outputs(runner, repository, run, provider): @pytest.mark.parametrize("provider", available_workflow_providers()) -def test_update_siblings(repository, run, no_lfs_warning, provider): +def test_update_siblings(project, run, no_lfs_warning, provider): """Test all generations of an activity are updated together.""" - parent = os.path.join(repository.path, "parent.txt") - brother = os.path.join(repository.path, "brother.txt") - sister = os.path.join(repository.path, "sister.txt") + parent = os.path.join(project.path, "parent.txt") + brother = os.path.join(project.path, "brother.txt") + sister = os.path.join(project.path, "sister.txt") siblings = [Path(brother), Path(sister)] - write_and_commit_file(repository, parent, "content") + write_and_commit_file(project.repository, parent, "content") assert 0 == run(args=["run", "tee", brother, sister], stdin=parent) @@ -263,7 +261,7 @@ def test_update_siblings(repository, run, no_lfs_warning, provider): for sibling in siblings: assert "content" == sibling.read_text() - write_and_commit_file(repository, parent, "changed content") + write_and_commit_file(project.repository, parent, "changed content") assert 0 == run(args=["update", "-p", provider, brother]) @@ -271,11 +269,11 @@ def test_update_siblings(repository, run, no_lfs_warning, provider): assert "changed content" == sibling.read_text() # Siblings kept together even when one is removed. - repository.remove(brother) - repository.commit("Brother removed") + project.repository.remove(brother) + project.repository.commit("Brother removed") assert not os.path.exists(brother) - write_and_commit_file(repository, parent, "more content") + write_and_commit_file(project.repository, parent, "more content") # Update should create the missing sibling assert 0 == run(args=["update", "-p", provider, "--all"]) @@ -285,14 +283,14 @@ def test_update_siblings(repository, run, no_lfs_warning, provider): @pytest.mark.parametrize("provider", available_workflow_providers()) -def test_update_siblings_in_output_directory(repository, run, provider): +def test_update_siblings_in_output_directory(project, run, provider): """Files in output directory are linked or removed after update.""" - source = os.path.join(repository.path, "source.txt") - output = Path(os.path.join(repository.path, "output")) # a directory + source = os.path.join(project.path, "source.txt") + output = Path(os.path.join(project.path, "output")) # a directory def write_source(): """Write source from files.""" - write_and_commit_file(repository, source, content="\n".join(" ".join(line) for line in files) + "\n") + write_and_commit_file(project.repository, source, content="\n".join(" ".join(line) for line in files) + "\n") def check_files(): """Check file content.""" @@ -322,13 +320,13 @@ def check_files(): @pytest.mark.parametrize("provider", available_workflow_providers()) -def test_update_relative_path_for_directory_input(repository, run, renku_cli, provider): +def test_update_relative_path_for_directory_input(project, run, renku_cli, provider): """Test having a directory input generates relative path in CWL.""" - write_and_commit_file(repository, repository.path / DATA_DIR / "file1", "file1") + write_and_commit_file(project.repository, project.path / DATA_DIR / "file1", "file1") assert 0 == run(args=["run", "ls", DATA_DIR], stdout="ls.data") - write_and_commit_file(repository, repository.path / DATA_DIR / "file2", "file2") + write_and_commit_file(project.repository, project.path / DATA_DIR / "file2", "file2") exit_code, activity = renku_cli("update", "-p", provider, "--all") @@ -339,46 +337,46 @@ def test_update_relative_path_for_directory_input(repository, run, renku_cli, pr @pytest.mark.parametrize("provider", available_workflow_providers()) -def test_update_no_args(runner, repository, no_lfs_warning, provider): +def test_update_no_args(runner, project, no_lfs_warning, provider): """Test calling update with no args raises ParameterError.""" - source = os.path.join(repository.path, "source.txt") - output = os.path.join(repository.path, "output.txt") + source = os.path.join(project.path, "source.txt") + output = os.path.join(project.path, "output.txt") - write_and_commit_file(repository, source, "content") + write_and_commit_file(project.repository, source, "content") result = runner.invoke(cli, ["run", "cp", source, output]) assert 0 == result.exit_code, format_result_exception(result) - write_and_commit_file(repository, source, "changed content") + write_and_commit_file(project.repository, source, "changed content") - before_commit = repository.head.commit + before_commit = project.repository.head.commit result = runner.invoke(cli, ["update", "-p", provider]) assert 2 == result.exit_code assert "Either PATHS, --all/-a, or --dry-run/-n should be specified." in result.output - assert before_commit == repository.head.commit + assert before_commit == project.repository.head.commit @pytest.mark.parametrize("provider", available_workflow_providers()) -def test_update_with_no_execution(repository, runner, provider): +def test_update_with_no_execution(project, runner, provider): """Test update when no workflow is executed.""" - input = os.path.join(repository.path, "data", "input.txt") - write_and_commit_file(repository, input, "content") + input = os.path.join(project.path, "data", "input.txt") + write_and_commit_file(project.repository, input, "content") result = runner.invoke(cli, ["update", "-p", provider, input], catch_exceptions=False) assert 1 == result.exit_code -def test_update_overridden_output(repository, renku_cli, runner): +def test_update_overridden_output(project, renku_cli, runner): """Test a path where final output is overridden will be updated partially.""" - a = os.path.join(repository.path, "a") - b = os.path.join(repository.path, "b") - c = os.path.join(repository.path, "c") + a = os.path.join(project.path, "a") + b = os.path.join(project.path, "b") + c = os.path.join(project.path, "c") - write_and_commit_file(repository, a, "content") + write_and_commit_file(project.repository, a, "content") assert 0 == runner.invoke(cli, ["run", "--name", "r1", "cp", a, b]).exit_code time.sleep(1) @@ -386,7 +384,7 @@ def test_update_overridden_output(repository, renku_cli, runner): time.sleep(1) assert 0 == renku_cli("run", "--name", "r3", "wc", a, stdout=c).exit_code - write_and_commit_file(repository, a, "new content") + write_and_commit_file(project.repository, a, "new content") result = runner.invoke(cli, ["update", "--dry-run"]) @@ -396,14 +394,14 @@ def test_update_overridden_output(repository, renku_cli, runner): assert "r3" in result.output -def test_update_overridden_outputs_partially(repository, renku_cli, runner): +def test_update_overridden_outputs_partially(project, renku_cli, runner): """Test a path where one of the final output is overridden will be updated completely but in proper order.""" - a = os.path.join(repository.path, "a") - b = os.path.join(repository.path, "b") - c = os.path.join(repository.path, "c") - d = os.path.join(repository.path, "d") + a = os.path.join(project.path, "a") + b = os.path.join(project.path, "b") + c = os.path.join(project.path, "c") + d = os.path.join(project.path, "d") - write_and_commit_file(repository, a, "content") + write_and_commit_file(project.repository, a, "content") assert 0 == runner.invoke(cli, ["run", "--name", "r1", "cp", a, b]).exit_code time.sleep(1) @@ -411,7 +409,7 @@ def test_update_overridden_outputs_partially(repository, renku_cli, runner): time.sleep(1) assert 0 == renku_cli("run", "--name", "r3", "wc", a, stdout=c).exit_code - write_and_commit_file(repository, a, "new content") + write_and_commit_file(project.repository, a, "new content") result = runner.invoke(cli, ["update", "--dry-run"]) @@ -422,14 +420,14 @@ def test_update_overridden_outputs_partially(repository, renku_cli, runner): assert result.output.find("r2") < result.output.find("r3") -def test_update_multiple_paths_common_output(repository, renku_cli, runner): +def test_update_multiple_paths_common_output(project, renku_cli, runner): """Test multiple paths that generate the same output will be updated except the last overridden step.""" - a = os.path.join(repository.path, "a") - b = os.path.join(repository.path, "b") - c = os.path.join(repository.path, "c") - d = os.path.join(repository.path, "d") + a = os.path.join(project.path, "a") + b = os.path.join(project.path, "b") + c = os.path.join(project.path, "c") + d = os.path.join(project.path, "d") - write_and_commit_file(repository, a, "content") + write_and_commit_file(project.repository, a, "content") assert 0 == runner.invoke(cli, ["run", "--name", "r1", "cp", a, b]).exit_code time.sleep(1) @@ -439,7 +437,7 @@ def test_update_multiple_paths_common_output(repository, renku_cli, runner): time.sleep(1) assert 0 == renku_cli("run", "--name", "r4", "wc", c, stdout=d).exit_code - write_and_commit_file(repository, a, "new content") + write_and_commit_file(project.repository, a, "new content") result = runner.invoke(cli, ["update", "--dry-run"]) @@ -451,7 +449,7 @@ def test_update_multiple_paths_common_output(repository, renku_cli, runner): @pytest.mark.parametrize("provider", available_workflow_providers()) -def test_update_with_execute(runner, repository, renku_cli, provider): +def test_update_with_execute(runner, project, renku_cli, provider): """Test output is updated when source changes.""" source1 = Path("source.txt") output1 = Path("output.txt") @@ -459,9 +457,9 @@ def test_update_with_execute(runner, repository, renku_cli, provider): output2 = Path("output2.txt") script = Path("cp.sh") - write_and_commit_file(repository, source1, "content_a") - write_and_commit_file(repository, source2, "content_b") - write_and_commit_file(repository, script, "cp $1 $2") + write_and_commit_file(project.repository, source1, "content_a") + write_and_commit_file(project.repository, source2, "content_b") + write_and_commit_file(project.repository, script, "cp $1 $2") result = runner.invoke(cli, ["run", "--name", "test", "bash", str(script), str(source1), str(output1)]) assert 0 == result.exit_code, format_result_exception(result) @@ -473,13 +471,13 @@ def test_update_with_execute(runner, repository, renku_cli, provider): ).exit_code ) - assert "content_a" == (repository.path / output1).read_text() - assert "content_b" == (repository.path / output2).read_text() + assert "content_a" == (project.path / output1).read_text() + assert "content_b" == (project.path / output2).read_text() result = runner.invoke(cli, ["status"]) assert 0 == result.exit_code, format_result_exception(result) - write_and_commit_file(repository, script, "cp $1 $2\necho '_modified' >> $2") + write_and_commit_file(project.repository, script, "cp $1 $2\necho '_modified' >> $2") result = runner.invoke(cli, ["status"]) assert 1 == result.exit_code @@ -489,10 +487,10 @@ def test_update_with_execute(runner, repository, renku_cli, provider): result = runner.invoke(cli, ["status"]) assert 0 == result.exit_code - assert "content_a_modified\n" == (repository.path / output1).read_text() - assert "content_b_modified\n" == (repository.path / output2).read_text() + assert "content_a_modified\n" == (project.path / output1).read_text() + assert "content_b_modified\n" == (project.path / output2).read_text() - write_and_commit_file(repository, script, "cp $1 $2\necho '_even more modified' >> $2") + write_and_commit_file(project.repository, script, "cp $1 $2\necho '_even more modified' >> $2") result = runner.invoke(cli, ["status"]) assert 1 == result.exit_code @@ -502,40 +500,40 @@ def test_update_with_execute(runner, repository, renku_cli, provider): result = runner.invoke(cli, ["status"]) assert 0 == result.exit_code - assert "content_a_even more modified\n" == (repository.path / output1).read_text() - assert "content_b_even more modified\n" == (repository.path / output2).read_text() + assert "content_a_even more modified\n" == (project.path / output1).read_text() + assert "content_b_even more modified\n" == (project.path / output2).read_text() -def test_update_with_external_files(runner, repository, directory_tree): +def test_update_with_external_files(runner, project, directory_tree): """Test update commands that use external files.""" assert 0 == runner.invoke(cli, ["dataset", "add", "-c", "--external", "my-dataset", directory_tree]).exit_code - path = repository.path / "data" / "my-dataset" / "directory_tree" / "file1" + path = project.path / "data" / "my-dataset" / "directory_tree" / "file1" assert 0 == runner.invoke(cli, ["run", "tail", path], stdout="output").exit_code (directory_tree / "file1").write_text("updated file1") assert 0 == runner.invoke(cli, ["dataset", "update", "--all", "--no-external"]).exit_code - assert "updated file1" not in (repository.path / "output").read_text() + assert "updated file1" not in (project.path / "output").read_text() assert 0 == runner.invoke(cli, ["dataset", "update", "--all"]).exit_code result = runner.invoke(cli, ["update", "--all"]) assert 0 == result.exit_code, format_result_exception(result) - assert "updated file1" in (repository.path / "output").read_text() + assert "updated file1" in (project.path / "output").read_text() -def test_update_ignore_deleted_files(runner, repository): +def test_update_ignore_deleted_files(runner, project): """Test update can ignore deleted files.""" - deleted = repository.path / "deleted" - write_and_commit_file(repository, "source", "source content") + deleted = project.path / "deleted" + write_and_commit_file(project.repository, "source", "source content") assert 0 == runner.invoke(cli, ["run", "--name", "run-1", "head", "source"], stdout="upstream").exit_code assert 0 == runner.invoke(cli, ["run", "--name", "run-2", "tail", "upstream"], stdout=deleted).exit_code - write_and_commit_file(repository, "source", "changes") - delete_and_commit_file(repository, deleted) + write_and_commit_file(project.repository, "source", "changes") + delete_and_commit_file(project.repository, deleted) result = runner.invoke(cli, ["update", "--dry-run", "--all", "--ignore-deleted"]) @@ -549,18 +547,18 @@ def test_update_ignore_deleted_files(runner, repository): assert not deleted.exists() -def test_update_ignore_deleted_files_config(runner, repository): +def test_update_ignore_deleted_files_config(runner, project): """Test update can ignore deleted files when proper config is set.""" - write_and_commit_file(repository, "source", "source content") + write_and_commit_file(project.repository, "source", "source content") assert 0 == runner.invoke(cli, ["run", "--name", "run-1", "head", "source"], stdout="upstream").exit_code assert 0 == runner.invoke(cli, ["run", "--name", "run-2", "tail", "upstream"], stdout="deleted").exit_code - write_and_commit_file(repository, "source", "changes") - delete_and_commit_file(repository, "deleted") + write_and_commit_file(project.repository, "source", "changes") + delete_and_commit_file(project.repository, "deleted") # Set config to ignore deleted files set_value("renku", "update_ignore_delete", "True") - repository.add(all=True) - repository.commit("Set config") + project.repository.add(all=True) + project.repository.commit("Set config") result = runner.invoke(cli, ["update", "--all", "--dry-run", "--ignore-deleted"]) @@ -569,14 +567,14 @@ def test_update_ignore_deleted_files_config(runner, repository): assert "run-2" not in result.output -def test_update_deleted_files_reported_with_siblings(runner, repository): +def test_update_deleted_files_reported_with_siblings(runner, project): """Test update regenerates deleted file if they have existing siblings.""" - deleted = repository.path / "deleted" - write_and_commit_file(repository, "source", "source content") + deleted = project.path / "deleted" + write_and_commit_file(project.repository, "source", "source content") assert 0 == runner.invoke(cli, ["run", "--input", "source", "touch", deleted, "sibling"]).exit_code - write_and_commit_file(repository, "source", "changes") - delete_and_commit_file(repository, deleted) + write_and_commit_file(project.repository, "source", "changes") + delete_and_commit_file(project.repository, deleted) result = runner.invoke(cli, ["update", "--all", "--ignore-deleted"]) @@ -584,15 +582,15 @@ def test_update_deleted_files_reported_with_siblings(runner, repository): assert deleted.exists() -def test_update_deleted_files_reported_with_downstream(runner, repository): +def test_update_deleted_files_reported_with_downstream(runner, project): """Test update reports deleted file if they have existing downstreams.""" - deleted = repository.path / "deleted" - write_and_commit_file(repository, "source", "source content") + deleted = project.path / "deleted" + write_and_commit_file(project.repository, "source", "source content") assert 0 == runner.invoke(cli, ["run", "head", "source"], stdout=deleted).exit_code assert 0 == runner.invoke(cli, ["run", "tail", deleted], stdout="downstream").exit_code - write_and_commit_file(repository, "source", "changes") - delete_and_commit_file(repository, deleted) + write_and_commit_file(project.repository, "source", "changes") + delete_and_commit_file(project.repository, deleted) result = runner.invoke(cli, ["update", "--all", "--ignore-deleted"]) diff --git a/tests/cli/test_workflow.py b/tests/cli/test_workflow.py index 59b5dafea3..b17c9e8520 100644 --- a/tests/cli/test_workflow.py +++ b/tests/cli/test_workflow.py @@ -32,10 +32,9 @@ import pytest from cwl_utils.parser import cwl_v1_2 as cwlgen -from renku.core.git import commit from renku.core.plugin.provider import available_workflow_providers +from renku.core.util.git import with_commit from renku.core.util.yaml import write_yaml -from renku.domain_model.project_context import project_context from renku.infrastructure.database import Database from renku.infrastructure.gateway.activity_gateway import ActivityGateway from renku.infrastructure.gateway.plan_gateway import PlanGateway @@ -54,7 +53,7 @@ def _execute(capsys, runner, args): assert e.code in {None, 0} -def test_workflow_list(runner, project, run_shell, client): +def test_workflow_list(runner, project, run_shell): """Test listing of workflows.""" # Run a shell command with pipe. output = run_shell('renku run --name run1 --description desc1 -- echo "a" > output1') @@ -145,7 +144,7 @@ def test_workflow_compose(runner, project, run_shell): assert 0 == result.exit_code, format_result_exception(result) - database = Database.from_path(project_context.database_path) + database = Database.from_path(project.database_path) composite_plan = database["plans-by-name"]["composite_workflow"] @@ -166,7 +165,7 @@ def test_workflow_compose(runner, project, run_shell): assert 0 == result.exit_code, format_result_exception(result) -def test_workflow_compose_from_paths(runner, project, run_shell, client): +def test_workflow_compose_from_paths(runner, project, run_shell): """Test renku workflow compose with input/output paths.""" # Run a shell command with pipe. output = run_shell('renku run --name run1 -- echo "a" > output1') @@ -205,7 +204,7 @@ def test_workflow_compose_from_paths(runner, project, run_shell, client): assert 0 == result.exit_code, format_result_exception(result) - database = Database.from_path(project_context.database_path) + database = Database.from_path(project.database_path) composite_plan = database["plans-by-name"]["composite_workflow1"] @@ -234,7 +233,7 @@ def test_workflow_compose_from_paths(runner, project, run_shell, client): assert 0 == result.exit_code, format_result_exception(result) - database = Database.from_path(project_context.database_path) + database = Database.from_path(project.database_path) composite_plan = database["plans-by-name"]["composite_workflow2"] @@ -261,7 +260,7 @@ def test_workflow_compose_from_paths(runner, project, run_shell, client): assert 0 == result.exit_code, format_result_exception(result) - database = Database.from_path(project_context.database_path) + database = Database.from_path(project.database_path) composite_plan = database["plans-by-name"]["composite_workflow3"] @@ -276,7 +275,7 @@ def test_workflow_compose_from_paths(runner, project, run_shell, client): assert composite_plan.mappings[4].default_value == "output3" -def test_workflow_show(runner, project, run_shell, client): +def test_workflow_show(runner, project, run_shell): """Test renku workflow show.""" # Run a shell command with pipe. output = run_shell('renku run --name run1 --description "my workflow" --success-code 0 -- echo "a" > output1') @@ -412,7 +411,7 @@ def test_workflow_export_command(runner, project): assert len(workflow.outputs) == 1 -def test_workflow_edit(runner, client): +def test_workflow_edit(runner, project): """Test naming of CWL tools and workflows.""" def _get_plan_id(output): @@ -422,7 +421,7 @@ def _get_plan_id(output): result = runner.invoke(cli, ["run", "--name", workflow_name, "touch", "data.txt"]) assert 0 == result.exit_code, format_result_exception(result) - database = Database.from_path(project_context.database_path) + database = Database.from_path(project.database_path) test_plan = database["plans-by-name"][workflow_name] cmd = ["workflow", "edit", workflow_name, "--name", "first"] @@ -430,7 +429,7 @@ def _get_plan_id(output): assert 0 == result.exit_code, format_result_exception(result) workflow_name = "first" - database = Database.from_path(project_context.database_path) + database = Database.from_path(project.database_path) first_plan = database["plans-by-name"]["first"] assert first_plan @@ -441,7 +440,7 @@ def _get_plan_id(output): result = runner.invoke(cli, cmd) assert 0 == result.exit_code, format_result_exception(result) - database = Database.from_path(project_context.database_path) + database = Database.from_path(project.database_path) first_plan = database["plans"][_get_plan_id(result.stdout)] assert first_plan.description == "Test workflow" @@ -455,7 +454,7 @@ def _get_plan_id(output): assert 0 == result.exit_code, format_result_exception(result) edited_plan_id = _get_plan_id(result.output) - database = Database.from_path(project_context.database_path) + database = Database.from_path(project.database_path) renamed_param_plan = database["plans"][_get_plan_id(result.output)] assert len(renamed_param_plan.parameters) > 0 @@ -463,7 +462,7 @@ def _get_plan_id(output): result = runner.invoke(cli, cmd) assert 0 == result.exit_code, format_result_exception(result) - database = Database.from_path(project_context.database_path) + database = Database.from_path(project.database_path) renamed_param_plan = database["plans"][_get_plan_id(result.output)] parameter_names = list(map(lambda x: x.name, renamed_param_plan.parameters)) assert len(parameter_names) > 0 @@ -480,7 +479,7 @@ def _get_plan_id(output): result = runner.invoke(cli, cmd) assert 0 == result.exit_code, format_result_exception(result) - database = Database.from_path(project_context.database_path) + database = Database.from_path(project.database_path) renamed_param_plan = database["plans"][_get_plan_id(result.output)] assert "Test parameter" == renamed_param_plan.parameters[0].description @@ -523,7 +522,7 @@ def _get_plan_id(output): result = runner.invoke(cli, cmd) assert 0 == result.exit_code, format_result_exception(result) - database = Database.from_path(project_context.database_path) + database = Database.from_path(project.database_path) edited_composite_plan = database["plans"][_get_plan_id(result.output)] assert len(edited_composite_plan.mappings) == 1 assert edited_composite_plan.mappings[0].mapped_parameters[0].name == "param1" @@ -532,7 +531,7 @@ def _get_plan_id(output): assert 0 == result.exit_code, format_result_exception(result) -def test_workflow_edit_no_change(runner, client, run_shell): +def test_workflow_edit_no_change(runner, project, run_shell): """Ensure that workflow edit doesn't commit if there's no changes.""" workflow_name = "my-workflow" @@ -540,21 +539,21 @@ def test_workflow_edit_no_change(runner, client, run_shell): result = runner.invoke(cli, ["run", "--name", workflow_name, "touch", "data.txt"]) assert 0 == result.exit_code, format_result_exception(result) - before = project_context.repository.head.commit + before = project.repository.head.commit result = runner.invoke(cli, ["workflow", "edit", workflow_name]) assert 0 == result.exit_code, format_result_exception(result) - assert before == project_context.repository.head.commit + assert before == project.repository.head.commit -def test_workflow_show_outputs_with_directory(runner, client, run): +def test_workflow_show_outputs_with_directory(runner, project, run): """Output files in directory are not shown as separate outputs.""" base_sh = ["bash", "-c", 'DIR="$0"; mkdir -p "$DIR"; ' 'for x in "$@"; do touch "$DIR/$x"; done'] assert 0 == run(args=["run"] + base_sh + ["output", "foo", "bar"]) - assert (project_context.path / "output" / "foo").exists() - assert (project_context.path / "output" / "bar").exists() + assert (project.path / "output" / "foo").exists() + assert (project.path / "output" / "bar").exists() cmd = ["workflow", "outputs"] result = runner.invoke(cli, cmd) @@ -599,17 +598,7 @@ def test_workflow_show_outputs_with_directory(runner, client, run): ], ) def test_workflow_execute_command( - runner, - run_shell, - project, - capsys, - client, - client_database_injection_manager, - provider, - yaml, - skip_metadata_update, - workflows, - parameters, + runner, run_shell, project, capsys, with_injection, provider, yaml, skip_metadata_update, workflows, parameters ): """Test workflow execute.""" @@ -635,8 +624,8 @@ def test_workflow_execute_command( def _flatten_dict(obj, key_string=""): if type(obj) == dict: key_string = key_string + "." if key_string else key_string - for k in obj: - yield from _flatten_dict(obj[k], key_string + str(k)) + for key in obj: + yield from _flatten_dict(obj[key], key_string + str(key)) else: yield key_string, obj @@ -646,7 +635,7 @@ def _flatten_dict(obj, key_string=""): execute_cmd.append("--skip-metadata-update") _execute(capsys, runner, execute_cmd) else: - database = Database.from_path(project_context.database_path) + database = Database.from_path(project.database_path) plan = database["plans-by-name"][workflow_name] execute_cmd = ["workflow", "execute", "-p", provider] if skip_metadata_update: @@ -693,7 +682,7 @@ def _flatten_dict(obj, key_string=""): assert 0 == result.exit_code, format_result_exception(result) if skip_metadata_update: - with client_database_injection_manager(client): + with with_injection(): plan_gateway = PlanGateway() plans = plan_gateway.get_all_plans() assert len(plans) == len(workflows) + (1 if is_composite else 0) @@ -703,12 +692,12 @@ def _flatten_dict(obj, key_string=""): @pytest.mark.parametrize("provider", available_workflow_providers()) -def test_workflow_execute_command_with_api_parameter_set(runner, run_shell, project, capsys, client, provider): +def test_workflow_execute_command_with_api_parameter_set(runner, run_shell, project, capsys, transaction_id, provider): """Test executing a workflow with --set for a renku.ui.api.Parameter.""" - script = project_context.path / "script.py" - output = project_context.path / "output" + script = project.path / "script.py" + output = project.path / "output" - with commit(): + with with_commit(repository=project.repository, transaction_id=transaction_id): script.write_text("from renku.ui.api import Parameter\n" 'print(Parameter("test", "hello world").value)\n') result = run_shell(f"renku run --name run1 -- python3 {script} > {output}") @@ -732,16 +721,16 @@ def test_workflow_execute_command_with_api_parameter_set(runner, run_shell, proj @pytest.mark.parametrize("provider", available_workflow_providers()) -def test_workflow_execute_command_with_api_input_set(runner, run_shell, project, capsys, client, provider): +def test_workflow_execute_command_with_api_input_set(runner, run_shell, project, capsys, transaction_id, provider): """Test executing a workflow with --set for a renku.ui.api.Input.""" - script = project_context.path / "script.py" - output = project_context.path / "output" - input = project_context.path / "input" + script = project.path / "script.py" + output = project.path / "output" + input = project.path / "input" input.write_text("input string") - other_input = project_context.path / "other_input" + other_input = project.path / "other_input" other_input.write_text("my other input string") - with commit(): + with with_commit(repository=project.repository, transaction_id=transaction_id): script.write_text( f"from renku.ui.api import Input\nwith open(Input('my-input', '{input.name}'), 'r') as f:\n" " print(f.read())" @@ -767,13 +756,13 @@ def test_workflow_execute_command_with_api_input_set(runner, run_shell, project, @pytest.mark.parametrize("provider", available_workflow_providers()) -def test_workflow_execute_command_with_api_output_set(runner, run_shell, project, capsys, client, provider): +def test_workflow_execute_command_with_api_output_set(runner, run_shell, project, capsys, transaction_id, provider): """Test executing a workflow with --set for a renku.ui.api.Output.""" - script = project_context.path / "script.py" - output = project_context.path / "output" - other_output = project_context.path / "other_output" + script = project.path / "script.py" + output = project.path / "output" + other_output = project.path / "other_output" - with commit(): + with with_commit(repository=project.repository, transaction_id=transaction_id): script.write_text( f"from renku.ui.api import Output\nwith open(Output('my-output', '{output.name}'), 'w') as f:\n" " f.write('test')" @@ -798,13 +787,13 @@ def test_workflow_execute_command_with_api_output_set(runner, run_shell, project assert 0 == result.exit_code, format_result_exception(result) -def test_workflow_execute_command_with_api_duplicate_output(runner, run_shell, project, capsys, client): +def test_workflow_execute_command_with_api_duplicate_output(runner, run_shell, project, capsys, transaction_id): """Test executing a workflow with duplicate output with differing path.""" - script = project_context.path / "script.py" - output = project_context.path / "output" - other_output = project_context.path / "other_output" + script = project.path / "script.py" + output = project.path / "output" + other_output = project.path / "other_output" - with commit(): + with with_commit(repository=project.repository, transaction_id=transaction_id): script.write_text( f"from renku.ui.api import Output\nopen(Output('my-output', '{output.name}'), 'w')\n" f"open(Output('my-output', '{other_output.name}'), 'w')" @@ -816,12 +805,12 @@ def test_workflow_execute_command_with_api_duplicate_output(runner, run_shell, p assert b"Error: Invalid parameter value - Duplicate input/output name found: my-output\n" in result[0] -def test_workflow_execute_command_with_api_valid_duplicate_output(runner, run_shell, project, capsys, client): +def test_workflow_execute_command_with_api_valid_duplicate_output(runner, run_shell, project, capsys, transaction_id): """Test executing a workflow with duplicate output with same path.""" - script = project_context.path / "script.py" - output = project_context.path / "output" + script = project.path / "script.py" + output = project.path / "output" - with commit(): + with with_commit(repository=project.repository, transaction_id=transaction_id): script.write_text( f"from renku.ui.api import Output\nopen(Output('my-output', '{output.name}'), 'w')\n" f"open(Output('my-output', '{output.name}'), 'w')" @@ -836,13 +825,13 @@ def test_workflow_execute_command_with_api_valid_duplicate_output(runner, run_sh assert result[1] is None -def test_workflow_execute_command_with_api_duplicate_input(runner, run_shell, project, capsys, client): +def test_workflow_execute_command_with_api_duplicate_input(runner, run_shell, project, capsys, transaction_id): """Test executing a workflow with duplicate input with differing path.""" - script = project_context.path / "script.py" - input = project_context.path / "input" - other_input = project_context.path / "other_input" + script = project.path / "script.py" + input = project.path / "input" + other_input = project.path / "other_input" - with commit(): + with with_commit(repository=project.repository, transaction_id=transaction_id): script.write_text( f"from renku.ui.api import Input\nopen(Input('my-input', '{input.name}'), 'w')\n" f"open(Input('my-input', '{other_input.name}'), 'w')" @@ -854,12 +843,12 @@ def test_workflow_execute_command_with_api_duplicate_input(runner, run_shell, pr assert b"Error: Invalid parameter value - Duplicate input/output name found: my-input\n" in result[0] -def test_workflow_execute_command_with_api_valid_duplicate_input(runner, run_shell, project, capsys, client): +def test_workflow_execute_command_with_api_valid_duplicate_input(runner, run_shell, project, capsys, transaction_id): """Test executing a workflow with duplicate input with same path.""" - script = project_context.path / "script.py" - input = project_context.path / "input" + script = project.path / "script.py" + input = project.path / "input" - with commit(): + with with_commit(repository=project.repository, transaction_id=transaction_id): script.write_text( f"from renku.ui.api import Input\nopen(Input('my-input', '{input.name}'), 'w')\n" f"open(Input('my-input', '{input.name}'), 'w')" @@ -874,10 +863,10 @@ def test_workflow_execute_command_with_api_valid_duplicate_input(runner, run_she assert result[1] is None -def test_workflow_visualize_non_interactive(runner, project, client, workflow_graph): +def test_workflow_visualize_non_interactive(runner, project, workflow_graph): """Test renku workflow visualize in non-interactive mode.""" - # We don't use pytest paramtrization for performance reasons, so we don't need to build the workflow_graph fixture + # We don't use pytest parametrization for performance reasons, so we don't need to build the workflow_graph fixture # for each execution columns = [[], ["-c", "command"], ["-c", "command,id,date,plan"]] from_command = [ @@ -920,7 +909,7 @@ def test_workflow_visualize_non_interactive(runner, project, client, workflow_gr assert "H" in result.output -def test_workflow_visualize_dot(runner, project, client, workflow_graph): +def test_workflow_visualize_dot(runner, project, workflow_graph): """Test renku workflow visualize dot format.""" result = runner.invoke(cli, ["workflow", "visualize", "--format", "dot", "--revision", "HEAD^", "H", "S"]) @@ -942,7 +931,7 @@ def test_workflow_visualize_dot(runner, project, client, workflow_graph): "Doesn't actually work, not really a tty available in github actions, " "see https://github.com/actions/runner/issues/241" ) -def test_workflow_visualize_interactive(runner, project, client, workflow_graph): +def test_workflow_visualize_interactive(runner, project, workflow_graph): """Test renku workflow visualize in interactive mode.""" dimensions = (120, 120) @@ -951,12 +940,12 @@ def test_workflow_visualize_interactive(runner, project, client, workflow_graph) output = [] - def _try_and_show_error(child): + def _try_and_show_error(child_process): # If there was an error, we'd get the 'Aaaaahhh' screen, so get it to print the exception and return the # screen after that. - child.send("\n") - child.expect(pexpect.TIMEOUT, timeout=2) - return _update_screen(child.before) + child_process.send("\n") + child_process.expect(pexpect.TIMEOUT, timeout=2) + return _update_screen(child_process.before) def _update_screen(data): output.append(data) @@ -1132,8 +1121,8 @@ def test_workflow_compose_execute(runner, project, run_shell): def test_workflow_iterate( runner, run_shell, - client, - client_database_injection_manager, + project, + with_injection, workflow, parameters, num_iterations, @@ -1178,7 +1167,7 @@ def test_workflow_iterate( assert b"error" not in output[0] if len(parameters) == 0: - # no effective mapping was suppiled + # no effective mapping was supplied # this should result in an error assert b"Error: Please check the provided mappings" in output[0] return @@ -1188,7 +1177,7 @@ def test_workflow_iterate( assert Path(o).resolve().exists() # check that metadata update was performed or not based on CLI flag - with client_database_injection_manager(client): + with with_injection(): plans = PlanGateway().get_all_plans() activities = ActivityGateway().get_all_activities() assert len(plans) == 1 @@ -1202,12 +1191,12 @@ def test_workflow_iterate( @pytest.mark.parametrize("provider", available_workflow_providers()) -def test_workflow_iterate_command_with_parameter_set(runner, run_shell, project, capsys, client, provider): +def test_workflow_iterate_command_with_parameter_set(runner, run_shell, project, capsys, transaction_id, provider): """Test executing a workflow with --set float value for a renku.ui.api.Parameter.""" - script = project_context.path / "script.py" - output = project_context.path / "output" + script = project.path / "script.py" + output = project.path / "output" - with commit(): + with with_commit(repository=project.repository, transaction_id=transaction_id): script.write_text("import sys\nprint(sys.argv[1])\n") result = run_shell(f"renku run --name run1 -- python {script} 3.98 > {output}") @@ -1242,11 +1231,11 @@ def test_workflow_iterate_command_with_parameter_set(runner, run_shell, project, assert 0 == result.exit_code, format_result_exception(result) -def test_workflow_cycle_detection(run_shell, project, capsys, client): +def test_workflow_cycle_detection(run_shell, project, capsys, transaction_id): """Test creating a cycle is not possible with renku run or workflow execute.""" - input = project_context.path / "input" + input = project.path / "input" - with commit(): + with with_commit(repository=project.repository, transaction_id=transaction_id): input.write_text("test") result = run_shell("renku run --name run1 -- cp input output") @@ -1275,18 +1264,18 @@ def test_workflow_cycle_detection(run_shell, project, capsys, client): @pytest.mark.skipif(sys.platform == "darwin", reason="GitHub macOS image doesn't include Docker") -def test_workflow_execute_docker_toil(runner, client, run_shell, caplog): +def test_workflow_execute_docker_toil(runner, project, run_shell, caplog): """Test workflow execute using docker with the toil provider.""" caplog.set_level(logging.INFO) - write_and_commit_file(project_context.repository, "input", "first line\nsecond line") - output = project_context.path / "output" + write_and_commit_file(project.repository, "input", "first line\nsecond line") + output = project.path / "output" run_shell("renku run --name run-1 -- tail -n 1 input > output") assert "first line" not in output.read_text() - write_and_commit_file(project_context.repository, "toil.yaml", "logLevel: INFO\ndocker:\n image: ubuntu") + write_and_commit_file(project.repository, "toil.yaml", "logLevel: INFO\ndocker:\n image: ubuntu") result = runner.invoke(cli, ["workflow", "execute", "-p", "toil", "-s", "n-1=2", "-c", "toil.yaml", "run-1"]) @@ -1295,16 +1284,16 @@ def test_workflow_execute_docker_toil(runner, client, run_shell, caplog): assert "executing with Docker" in caplog.text -def test_workflow_execute_docker_toil_stderr(runner, client, run_shell): +def test_workflow_execute_docker_toil_stderr(runner, project, run_shell): """Test workflow execute using docker with the toil provider and stderr redirection.""" - write_and_commit_file(project_context.repository, "input", "first line\nsecond line") - output = project_context.path / "output" + write_and_commit_file(project.repository, "input", "first line\nsecond line") + output = project.path / "output" run_shell("renku run --name run-1 -- tail -n 1 input 2> output") assert "first line" not in output.read_text() - write_and_commit_file(project_context.repository, "toil.yaml", "docker:\n image: ubuntu") + write_and_commit_file(project.repository, "toil.yaml", "docker:\n image: ubuntu") result = runner.invoke(cli, ["workflow", "execute", "-p", "toil", "-s", "n-1=2", "-c", "toil.yaml", "run-1"]) @@ -1323,7 +1312,7 @@ def test_workflow_execute_docker_toil_stderr(runner, client, run_shell): ) ], ) -def test_workflow_templated_params(runner, run_shell, client, capsys, workflow, parameters, provider, outputs): +def test_workflow_templated_params(runner, run_shell, project, capsys, workflow, parameters, provider, outputs): """Test executing a workflow with templated parameters.""" workflow_name = "foobar" @@ -1342,17 +1331,17 @@ def test_workflow_templated_params(runner, run_shell, client, capsys, workflow, assert Path(o).resolve().exists() -def test_revert_activity(client, runner, client_database_injection_manager): +def test_revert_activity(runner, project, with_injection): """Test reverting activities.""" - input = project_context.path / "input" - intermediate = project_context.path / "intermediate" - output = project_context.path / "output" + input = project.path / "input" + intermediate = project.path / "intermediate" + output = project.path / "output" assert 0 == runner.invoke(cli, ["run", "--name", "r1", "--", "echo", "some-data"], stdout=input).exit_code assert 0 == runner.invoke(cli, ["run", "--name", "r2", "--", "head", input], stdout=intermediate).exit_code assert 0 == runner.invoke(cli, ["run", "--name", "r3", "--", "tail", intermediate], stdout=output).exit_code - with client_database_injection_manager(client): + with with_injection(): activity_gateway = ActivityGateway() activity = next(a for a in activity_gateway.get_all_activities() if a.association.plan.name == "r1") @@ -1360,7 +1349,7 @@ def test_revert_activity(client, runner, client_database_injection_manager): assert 0 == result.exit_code, format_result_exception(result) - with client_database_injection_manager(client): + with with_injection(): activity_gateway = ActivityGateway() activities = activity_gateway.get_all_activities(include_deleted=True) activity_1, activity_2, activity_3 = sorted(activities, key=lambda a: a.association.plan.name) @@ -1377,7 +1366,7 @@ def test_revert_activity(client, runner, client_database_injection_manager): assert 0 == result.exit_code, format_result_exception(result) - with client_database_injection_manager(client): + with with_injection(): activity_gateway = ActivityGateway() activities = activity_gateway.get_all_activities(include_deleted=True) activity_1, activity_2, activity_3 = sorted(activities, key=lambda a: a.association.plan.name) @@ -1390,16 +1379,16 @@ def test_revert_activity(client, runner, client_database_injection_manager): assert set() == activity_gateway.get_downstream_activities(activity_3) -def test_reverted_activity_status(client, runner, client_database_injection_manager): +def test_reverted_activity_status(runner, project, with_injection): """Test that reverted activity doesn't affect status/update/log/etc.""" - input = project_context.path / "input" - write_and_commit_file(project_context.repository, input, "content") - output = project_context.path / "output" + input = project.path / "input" + write_and_commit_file(project.repository, input, "content") + output = project.path / "output" assert 0 == runner.invoke(cli, ["run", "cat", input], stdout=output).exit_code - write_and_commit_file(project_context.repository, input, "changes") + write_and_commit_file(project.repository, input, "changes") - with client_database_injection_manager(client): + with with_injection(): activity_gateway = ActivityGateway() activity_id = activity_gateway.get_all_activities()[0].id diff --git a/tests/core/commands/test_cli.py b/tests/core/commands/test_cli.py index 3a68b8cf22..7da75c787c 100644 --- a/tests/core/commands/test_cli.py +++ b/tests/core/commands/test_cli.py @@ -155,10 +155,10 @@ def test_streams(runner, project, capsys, no_lfs_warning): assert "source.txt" in result.output -def test_streams_cleanup(runner, repository, run): +def test_streams_cleanup(runner, project, run): """Test cleanup of standard streams.""" - source = repository.path / "source.txt" - stdout = repository.path / "result.txt" + source = project.path / "source.txt" + stdout = project.path / "result.txt" with source.open("w") as fp: fp.write("first,second,third") @@ -179,7 +179,7 @@ def test_streams_cleanup(runner, repository, run): with stdout.open("w") as fp: fp.write("1") - repository.add("result.txt") + project.repository.add("result.txt") with stdout.open("r") as fp: assert fp.read() == "1" @@ -410,14 +410,14 @@ def test_status_with_submodules(isolated_runner, monkeypatch, project_init): assert 0 == result.exit_code, format_result_exception(result) -def test_status_consistency(repository, runner): +def test_status_consistency(runner, project): """Test status consistency in subdirectories.""" os.mkdir("somedirectory") with open("somedirectory/woop", "w") as fp: fp.write("woop") - repository.add("somedirectory/woop") - repository.commit("add woop") + project.repository.add("somedirectory/woop") + project.repository.commit("add woop") result = runner.invoke(cli, ["run", "cp", "somedirectory/woop", "somedirectory/meeh"]) assert 0 == result.exit_code, format_result_exception(result) @@ -425,8 +425,8 @@ def test_status_consistency(repository, runner): with open("somedirectory/woop", "w") as fp: fp.write("weep") - repository.add("somedirectory/woop") - repository.commit("fix woop") + project.repository.add("somedirectory/woop") + project.repository.commit("fix woop") base_result = runner.invoke(cli, ["status"]) os.chdir("somedirectory") @@ -478,9 +478,9 @@ def test_unchanged_stdout(runner, project, capsys, no_lfs_warning): @pytest.mark.skip(reason="renku update not implemented with new metadata yet, reenable later") -def test_modified_output(runner, repository, run): +def test_modified_output(runner, project, run): """Test detection of changed file as output.""" - cwd = repository.path + cwd = project.path source = cwd / "source.txt" data = cwd / DATA_DIR / "results" data.mkdir(parents=True) @@ -493,8 +493,8 @@ def update_source(content): with source.open("w") as fp: fp.write(content) - repository.add(all=True) - repository.commit("Updated source.txt") + project.repository.add(all=True) + project.repository.commit("Updated source.txt") update_source("1") @@ -540,35 +540,34 @@ def test_outputs(runner, project): assert siblings == set(result.output.strip().split("\n")) -def test_deleted_input(runner, repository, capsys): +def test_deleted_input(runner, project, capsys): """Test deleted input.""" - cwd = repository.path - input_ = cwd / "input.txt" - with input_.open("w") as f: + input = project.path / "input.txt" + with input.open("w") as f: f.write("first") - repository.add(all=True) - repository.commit("Created input.txt") + project.repository.add(all=True) + project.repository.commit("Created input.txt") - cmd = ["run", "mv", input_.name, "input.mv"] + cmd = ["run", "mv", input.name, "input.mv"] result = runner.invoke(cli, cmd, catch_exceptions=False) assert 0 == result.exit_code, format_result_exception(result) - assert not input_.exists() + assert not input.exists() assert Path("input.mv").exists() @pytest.mark.skip(reason="renku update not implemented with new metadata yet, reenable later") -def test_input_directory(runner, repository, run, no_lfs_warning): +def test_input_directory(runner, project, run, no_lfs_warning): """Test detection of input directory.""" - cwd = repository.path + cwd = project.path output = cwd / "output.txt" inputs = cwd / "inputs" inputs.mkdir(parents=True) gitkeep = inputs / ".gitkeep" gitkeep.touch() - repository.add(all=True) - repository.commit("Empty inputs directory") + project.repository.add(all=True) + project.repository.commit("Empty inputs directory") assert 0 == run(args=("run", "ls", str(inputs)), stdout=output) with output.open("r") as fp: @@ -576,8 +575,8 @@ def test_input_directory(runner, repository, run, no_lfs_warning): (inputs / "first").touch() - repository.add(all=True) - repository.commit("Created inputs") + project.repository.add(all=True) + project.repository.commit("Created inputs") assert 0 == run(args=("update", output.name)) @@ -585,8 +584,8 @@ def test_input_directory(runner, repository, run, no_lfs_warning): assert "first\n" == fp.read() (inputs / "second").touch() - repository.add(all=True) - repository.commit("Added second input") + project.repository.add(all=True) + project.repository.commit("Added second input") assert 0 == run(args=("update", output.name)) with output.open("r") as fp: @@ -610,7 +609,7 @@ def test_config_manager_creation(project, global_only, config_path_attr): @pytest.mark.parametrize("global_only", (False, True)) -def test_config_manager_set_value(client, global_only): +def test_config_manager_set_value(project, global_only): """Check writing to configuration.""" config_filter = ConfigFilter.GLOBAL_ONLY @@ -627,7 +626,7 @@ def test_config_manager_set_value(client, global_only): assert "zenodo" not in config.sections() -def test_config_get_value(client): +def test_config_get_value(project): """Check reading from configuration.""" # Value set locally is not visible globally set_value("local", "key", "local-value") diff --git a/tests/core/commands/test_client.py b/tests/core/commands/test_client.py deleted file mode 100644 index b8aa4c8cf5..0000000000 --- a/tests/core/commands/test_client.py +++ /dev/null @@ -1,35 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2017-2022 - Swiss Data Science Center (SDSC) -# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and -# Eidgenössische Technische Hochschule Zürich (ETHZ). -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Test Python SDK client.""" - -import pytest - -from renku.domain_model.project_context import project_context - - -@pytest.mark.parametrize( - "paths, ignored", - ( - ([".renku.lock"], [".renku.lock"]), - (["not ignored", "lib/foo", "build/html"], ["lib/foo", "build/html"]), - (["not ignored"], []), - ), -) -def test_ignored_paths(paths, ignored, client): - """Test resolution of ignored paths.""" - assert project_context.repository.get_ignored_paths(*paths) == ignored diff --git a/tests/core/commands/test_dataset.py b/tests/core/commands/test_dataset.py index 4203b78865..5331b40c76 100644 --- a/tests/core/commands/test_dataset.py +++ b/tests/core/commands/test_dataset.py @@ -110,7 +110,7 @@ def test_creator_parse(): Dataset(name="dataset", creators=["name"]) -def test_creators_with_same_email(project_with_injection, load_dataset_with_injection): +def test_creators_with_same_email(project_with_injection): """Test creators with different names and same email address.""" with DatasetContext(name="dataset", create=True) as dataset: dataset.creators = [Person(name="me", email="me@example.com"), Person(name="me2", email="me@example.com")] @@ -159,9 +159,9 @@ def test_list_files_default(project, tmpdir): assert "some-file" in [Path(f.entity.path).name for f in files] -def test_unlink_default(directory_tree, client): +def test_unlink_default(directory_tree, project): """Test unlink default behaviour.""" - with chdir(project_context.path): + with chdir(project.path): create_dataset_command().with_database(write=True).build().execute("dataset") add_to_dataset_command().with_database(write=True).build().execute( dataset_name="dataset", urls=[str(directory_tree / "dir1")] @@ -172,7 +172,7 @@ def test_unlink_default(directory_tree, client): @pytest.mark.xfail -def test_mutate(client): +def test_mutate(project): """Test metadata change after dataset mutation.""" dataset = Dataset( name="my-dataset", @@ -184,14 +184,14 @@ def test_mutate(client): dataset.mutate() - mutator = get_git_user(project_context.repository) + mutator = get_git_user(project.repository) assert_dataset_is_mutated(old=old_dataset, new=dataset, mutator=mutator) @pytest.mark.xfail -def test_mutator_is_added_once(client): +def test_mutator_is_added_once(project): """Test mutator of a dataset is added only once to its creators list.""" - mutator = get_git_user(project_context.repository) + mutator = get_git_user(project.repository) dataset = Dataset( name="my-dataset", @@ -246,12 +246,12 @@ def test_uppercase_dataset_name_is_valid(): @pytest.mark.integration -def test_get_dataset_by_tag(with_injections_manager, tmp_path): +def test_get_dataset_by_tag(with_injection, tmp_path): """Test getting datasets by a given tag.""" url = "https://dev.renku.ch/gitlab/renku-python-integration-tests/lego-datasets.git" repository = Repository.clone_from(url=url, path=tmp_path / "repo") - with with_injections_manager(repository), project_context.with_path(repository.path): + with project_context.with_path(repository.path), with_injection(): dataset_gateway = DatasetGateway() parts_dataset = dataset_gateway.get_by_name("parts") diff --git a/tests/core/commands/test_doctor.py b/tests/core/commands/test_doctor.py index 9ecbf5e55f..de18998ec9 100644 --- a/tests/core/commands/test_doctor.py +++ b/tests/core/commands/test_doctor.py @@ -70,7 +70,7 @@ def test_git_hooks_modified(runner, project): assert "Git hooks are outdated or not installed." in result.output -def test_lfs_broken_history(runner, client, tmp_path): +def test_lfs_broken_history(runner, project, tmp_path): """Test lfs migrate info check on a broken history.""" big_file = tmp_path / "big-file.bin" with open(big_file, "w") as file_: @@ -98,10 +98,10 @@ def test_lfs_broken_history(runner, client, tmp_path): assert "Git history contains large files" not in result.output -def test_check_invalid_imported_dataset(runner, project_with_datasets, client_database_injection_manager): +def test_check_invalid_imported_dataset(runner, project_with_datasets, with_injection): """Test checking imported datasets that have both derived_from and same_as set.""" - with client_database_injection_manager(project_with_datasets): - with with_dataset(project_with_datasets, name="dataset-1", commit_database=True) as dataset: + with with_injection(): + with with_dataset(name="dataset-1", commit_database=True) as dataset: # NOTE: Set both same_as and derived_from for a dataset dataset.same_as = Url(url_str="http://example.com") dataset.derived_from = Url(url_id="datasets/non-existing-id") @@ -119,43 +119,43 @@ def test_check_invalid_imported_dataset(runner, project_with_datasets, client_da assert 0 == result.exit_code, format_result_exception(result) -def test_fix_invalid_imported_dataset(runner, project_with_datasets, client_database_injection_manager): +def test_fix_invalid_imported_dataset(runner, project_with_datasets, with_injection): """Test fixing imported datasets that have both derived_from and same_as set.""" - with client_database_injection_manager(project_with_datasets): - with with_dataset(project_with_datasets, name="dataset-1", commit_database=True) as dataset: + with with_injection(): + with with_dataset(name="dataset-1", commit_database=True) as dataset: # NOTE: Set both same_as and derived_from for a dataset dataset.same_as = Url(url_str="http://example.com") dataset.derived_from = Url(url_id="datasets/non-existing-id") - project_with_datasets.add(all=True) - project_with_datasets.commit("modified dataset") + project_with_datasets.repository.add(all=True) + project_with_datasets.repository.commit("modified dataset") - before_commit_sha = project_with_datasets.head.commit.hexsha + before_commit_sha = project_with_datasets.repository.head.commit.hexsha result = runner.invoke(cli, ["doctor", "--fix"]) assert 0 == result.exit_code, format_result_exception(result) assert "Fixing dataset 'dataset-1'" in result.output - assert before_commit_sha != project_with_datasets.head.commit.hexsha - assert not project_with_datasets.is_dirty(untracked_files=True) + assert before_commit_sha != project_with_datasets.repository.head.commit.hexsha + assert not project_with_datasets.repository.is_dirty(untracked_files=True) - with client_database_injection_manager(project_with_datasets): - with with_dataset(project_with_datasets, name="dataset-1") as dataset: + with with_injection(): + with with_dataset(name="dataset-1") as dataset: # NOTE: Set both same_as and derived_from for a dataset assert dataset.same_as.value == "http://example.com" assert dataset.derived_from is None -def test_file_outside_datadir(runner, project_with_datasets, client_database_injection_manager): +def test_file_outside_datadir(runner, project_with_datasets, with_injection): """Test doctor check deal with files outside a datasets datadir.""" - write_and_commit_file(project_with_datasets, "some_file", "content_a") + write_and_commit_file(project_with_datasets.repository, "some_file", "content_a") - with client_database_injection_manager(project_with_datasets): - with with_dataset(project_with_datasets, name="dataset-1", commit_database=True) as dataset: - dataset.add_or_update_files([DatasetFile.from_path(project_with_datasets, "some_file")]) - project_with_datasets.add(all=True) - project_with_datasets.commit("modified dataset") + with with_injection(): + with with_dataset(name="dataset-1", commit_database=True) as dataset: + dataset.add_or_update_files([DatasetFile.from_path("some_file")]) + project_with_datasets.repository.add(all=True) + project_with_datasets.repository.commit("modified dataset") result = runner.invoke(cli, ["doctor"]) assert 1 == result.exit_code, format_result_exception(result) @@ -165,15 +165,15 @@ def test_file_outside_datadir(runner, project_with_datasets, client_database_inj result = runner.invoke(cli, ["doctor", "--fix"]) assert 0 == result.exit_code, format_result_exception(result) - with client_database_injection_manager(project_with_datasets): - with with_dataset(project_with_datasets, name="dataset-1", commit_database=True) as dataset: + with with_injection(): + with with_dataset(name="dataset-1", commit_database=True) as dataset: assert 1 == len(dataset.files) assert dataset.files[0].entity.path.startswith(str(dataset.get_datadir())) -def test_doctor_fix_activity_catalog(runner, client, client_database_injection_manager): +def test_doctor_fix_activity_catalog(runner, project, with_injection): """Test detecting and fixing activity catalogs that were not persisted.""" - with client_database_injection_manager(client): + with with_injection(): upstream = create_dummy_activity(plan="p1", generations=["input"]) activity = create_dummy_activity(plan="p2", usages=["input"], generations=["intermediate"]) downstream = create_dummy_activity(plan="p3", usages=["intermediate"], generations=["output"]) diff --git a/tests/core/commands/test_plan_factory.py b/tests/core/commands/test_plan_factory.py index c1a50ec627..a0b230f2cc 100644 --- a/tests/core/commands/test_plan_factory.py +++ b/tests/core/commands/test_plan_factory.py @@ -22,24 +22,23 @@ import pytest from renku.core.workflow.plan_factory import PlanFactory -from renku.domain_model.project_context import project_context -def test_1st_tool(client, client_database_injection_manager): +def test_1st_tool(project, with_injection): """Check creation of 1st tool example from args.""" - with client_database_injection_manager(client): + with with_injection(): plan = PlanFactory(("echo", "Hello world!")).to_plan() assert "Hello world!" == plan.parameters[0].default_value -def test_03_input(client, client_database_injection_manager): +def test_03_input(project, with_injection): """Check the essential input parameters.""" - whale = Path(project_context.path) / "whale.txt" + whale = Path(project.path) / "whale.txt" whale.touch() - project_context.repository.add(whale) - project_context.repository.commit("add whale.txt") + project.repository.add(whale) + project.repository.commit("add whale.txt") argv = [ "echo", @@ -49,8 +48,8 @@ def test_03_input(client, client_database_injection_manager): "hello", "--file=whale.txt", ] - with client_database_injection_manager(client): - plan = PlanFactory(argv, directory=project_context.path, working_dir=project_context.path).to_plan() + with with_injection(): + plan = PlanFactory(argv, directory=project.path, working_dir=project.path).to_plan() assert ["-f"] == plan.parameters[0].to_argv() @@ -66,17 +65,17 @@ def test_03_input(client, client_database_injection_manager): assert argv == plan.to_argv() -def test_base_command_detection(client, client_database_injection_manager): +def test_base_command_detection(project, with_injection): """Test base command detection.""" - hello = Path(project_context.path) / "hello.tar" + hello = Path(project.path) / "hello.tar" hello.touch() - project_context.repository.add(hello) - project_context.repository.commit("add hello.tar") + project.repository.add(hello) + project.repository.commit("add hello.tar") argv = ["tar", "xf", "hello.tar"] - with client_database_injection_manager(client): - plan = PlanFactory(argv, directory=project_context.path, working_dir=project_context.path).to_plan() + with with_injection(): + plan = PlanFactory(argv, directory=project.path, working_dir=project.path).to_plan() assert "tar xf" == plan.command assert plan.inputs[0].default_value == "hello.tar" @@ -85,49 +84,49 @@ def test_base_command_detection(client, client_database_injection_manager): assert argv == plan.to_argv() -def test_base_command_as_file_input(client, client_database_injection_manager): +def test_base_command_as_file_input(project, with_injection): """Test base command detection when it is a script file.""" - cwd = Path(project_context.path) + cwd = Path(project.path) script = cwd / "script.py" script.touch() input_file = cwd / "input.csv" input_file.touch() - project_context.repository.add(script, input_file) - project_context.repository.commit("add file") + project.repository.add(script, input_file) + project.repository.commit("add file") argv = ["script.py", "input.csv"] - with client_database_injection_manager(client): - plan = PlanFactory(argv, directory=project_context.path, working_dir=project_context.path).to_plan() + with with_injection(): + plan = PlanFactory(argv, directory=project.path, working_dir=project.path).to_plan() assert not plan.command assert 2 == len(plan.inputs) -def test_short_base_command_detection(client, client_database_injection_manager): +def test_short_base_command_detection(project, with_injection): """Test base command detection without parameters.""" - with client_database_injection_manager(client): + with with_injection(): plan = PlanFactory(("echo", "A")).to_plan() assert "A" == plan.parameters[0].default_value assert ["echo", "A"] == plan.to_argv() -def test_04_output(client, client_database_injection_manager): - """Test describtion of outputs from a command.""" - hello = Path(project_context.path) / "hello.tar" +def test_04_output(project, with_injection): + """Test description of outputs from a command.""" + hello = Path(project.path) / "hello.tar" hello.touch() - project_context.repository.add(hello) - project_context.repository.commit("add hello.tar") + project.repository.add(hello) + project.repository.commit("add hello.tar") argv = ["tar", "xf", "hello.tar"] - factory = PlanFactory(argv, directory=project_context.path, working_dir=project_context.path) + factory = PlanFactory(argv, directory=project.path, working_dir=project.path) # simulate run - output = Path(project_context.path) / "hello.txt" + output = Path(project.path) / "hello.txt" output.touch() factory.add_outputs([(output, None)]) @@ -135,73 +134,73 @@ def test_04_output(client, client_database_injection_manager): assert "hello.txt" == parameters[0].default_value - with client_database_injection_manager(client): + with with_injection(): plan = factory.to_plan() assert argv == plan.to_argv() -def test_05_stdout(client, client_database_injection_manager): +def test_05_stdout(project, with_injection): """Test stdout mapping.""" - output = Path(project_context.path) / "output.txt" + output = Path(project.path) / "output.txt" output.touch() - project_context.repository.add(output) - project_context.repository.commit("add output") + project.repository.add(output) + project.repository.commit("add output") argv = ["echo", "Hello world!"] - factory = PlanFactory(argv, directory=project_context.path, working_dir=project_context.path, stdout="output.txt") + factory = PlanFactory(argv, directory=project.path, working_dir=project.path, stdout="output.txt") assert "output.txt" == factory.stdout factory.add_outputs([("output.txt", None)]) assert "stdout" == factory.outputs[0].mapped_to.stream_type assert 2 == factory.outputs[0].position - with client_database_injection_manager(client): + with with_injection(): plan = factory.to_plan() assert ["echo", '"Hello world!"'] == plan.to_argv() -def test_stdout_with_conflicting_arg(client, client_database_injection_manager): +def test_stdout_with_conflicting_arg(project, with_injection): """Test stdout with conflicting argument value.""" - output = Path(project_context.path) / "lalala" + output = Path(project.path) / "lalala" output.touch() - project_context.repository.add(output) - project_context.repository.commit("add lalala") + project.repository.add(output) + project.repository.commit("add lalala") argv = ["echo", "lalala"] - factory = PlanFactory(argv, directory=project_context.path, working_dir=project_context.path, stdout="lalala") + factory = PlanFactory(argv, directory=project.path, working_dir=project.path, stdout="lalala") assert "lalala" == factory.parameters[0].default_value assert "lalala" == factory.stdout - with client_database_injection_manager(client): + with with_injection(): plan = factory.to_plan() assert argv == plan.to_argv() -def test_06_params(client, client_database_injection_manager): +def test_06_params(project, with_injection): """Test referencing input parameters in other fields.""" - hello = Path(project_context.path) / "hello.tar" + hello = Path(project.path) / "hello.tar" hello.touch() - project_context.repository.add(hello) - project_context.repository.commit("add hello.tar") + project.repository.add(hello) + project.repository.commit("add hello.tar") argv = ["tar", "xf", "hello.tar", "goodbye.txt"] - factory = PlanFactory(argv, directory=project_context.path, working_dir=project_context.path) + factory = PlanFactory(argv, directory=project.path, working_dir=project.path) assert "goodbye.txt" == factory.parameters[0].default_value - with client_database_injection_manager(client): + with with_injection(): plan = factory.to_plan() assert argv == plan.to_argv() -def test_09_array_inputs(client, client_database_injection_manager): +def test_09_array_inputs(project, with_injection): """Test specification of input parameters in arrays.""" argv = [ "echo", @@ -214,8 +213,8 @@ def test_09_array_inputs(client, client_database_injection_manager): "-B=six", "-C=seven,eight,nine", ] - with client_database_injection_manager(client): - plan = PlanFactory(argv, directory=project_context.path, working_dir=project_context.path).to_plan() + with with_injection(): + plan = PlanFactory(argv, directory=project.path, working_dir=project.path).to_plan() assert "seven,eight,nine" == plan.parameters[-1].default_value assert "-C=" == plan.parameters[-1].prefix @@ -224,22 +223,22 @@ def test_09_array_inputs(client, client_database_injection_manager): @pytest.mark.parametrize("argv", [["wc"], ["wc", "-l"]]) -def test_stdin_and_stdout(argv, client, client_database_injection_manager): +def test_stdin_and_stdout(argv, project, with_injection): """Test stdout mapping.""" - input_ = Path(project_context.path) / "input.txt" - input_.touch() - output = Path(project_context.path) / "output.txt" + input = project.path / "input.txt" + input.touch() + output = project.path / "output.txt" output.touch() - error = Path(project_context.path) / "error.txt" + error = project.path / "error.txt" error.touch() - project_context.repository.add(input_, output, error) - project_context.repository.commit("add files") + project.repository.add(input, output, error) + project.repository.commit("add files") factory = PlanFactory( argv, - directory=project_context.path, - working_dir=project_context.path, + directory=project.path, + working_dir=project.path, stdin="input.txt", stdout="output.txt", stderr="error.txt", @@ -253,7 +252,7 @@ def test_stdin_and_stdout(argv, client, client_database_injection_manager): factory.add_outputs([("output.txt", None), ("error.txt", None)]) assert "stdout" == factory.outputs[0].mapped_to.stream_type - with client_database_injection_manager(client): + with with_injection(): plan = factory.to_plan() assert argv == plan.to_argv() @@ -269,9 +268,9 @@ def test_stdin_and_stdout(argv, client, client_database_injection_manager): ) -def test_input_directory(client, client_database_injection_manager): +def test_input_directory(project, with_injection): """Test input directory.""" - cwd = Path(project_context.path) + cwd = Path(project.path) src = cwd / "src" src.mkdir(parents=True) @@ -281,13 +280,13 @@ def test_input_directory(client, client_database_injection_manager): src_tar = cwd / "src.tar" src_tar.touch() - project_context.repository.add(src, src_tar) - project_context.repository.commit("add file and folder") + project.repository.add(src, src_tar) + project.repository.commit("add file and folder") argv = ["tar", "czvf", "src.tar", "src"] - factory = PlanFactory(argv, directory=project_context.path, working_dir=project_context.path) + factory = PlanFactory(argv, directory=project.path, working_dir=project.path) - with client_database_injection_manager(client): + with with_injection(): plan = factory.to_plan() assert argv == plan.to_argv() @@ -298,41 +297,41 @@ def test_input_directory(client, client_database_injection_manager): assert inputs[1].default_value == src.name -@pytest.mark.skip("CWLConverter doesn't yet support new metadata, renable once it does") -def test_existing_output_directory(client, runner, project, client_database_injection_manager): +@pytest.mark.skip("CWLConverter doesn't yet support new metadata, re-enable once it does") +def test_existing_output_directory(runner, project, with_injection): """Test creation of InitialWorkDirRequirement for output.""" from renku.core.workflow.converters.cwl import CWLConverter - project_context.path = project_context.path - output = project_context.path / "output" + project.path = project.path + output = project.path / "output" argv = ["script", "output"] - factory = PlanFactory(argv, directory=project_context.path, working_dir=project_context.path) + factory = PlanFactory(argv, directory=project.path, working_dir=project.path) - with factory.watch(client, no_output=True) as tool: + with factory.watch(no_output=True): # Script creates the directory. output.mkdir(parents=True) - with client_database_injection_manager(client): + with with_injection(): plan = factory.to_plan() - cwl, _ = CWLConverter.convert(plan, project_context.path) + cwl, _ = CWLConverter.convert(plan, project.path) assert 1 == len([r for r in cwl.requirements if hasattr(r, "listing")]) output.mkdir(parents=True, exist_ok=True) - with factory.watch(client) as tool: + with factory.watch() as tool: # The directory already exists. (output / "result.txt").touch() assert 1 == len(tool.outputs) - with client_database_injection_manager(client): + with with_injection(): plan = tool.to_plan() - cwl, _ = CWLConverter.convert(plan, project_context.path) + cwl, _ = CWLConverter.convert(plan, project.path) - reqs = [r for r in cwl.requirements if hasattr(r, "listing")] + requirements = [r for r in cwl.requirements if hasattr(r, "listing")] - assert 1 == len(reqs) - assert output.name == reqs[0].listing[0].entryname + assert 1 == len(requirements) + assert output.name == requirements[0].listing[0].entryname assert 1 == len(tool.outputs) diff --git a/tests/core/commands/test_serialization.py b/tests/core/commands/test_serialization.py index 660bc0d77c..b4126839d6 100644 --- a/tests/core/commands/test_serialization.py +++ b/tests/core/commands/test_serialization.py @@ -24,11 +24,12 @@ from renku.core.migration.models import v9 as old_datasets from renku.core.util.uuid import is_uuid +from tests.utils import get_dataset_with_injection -def test_dataset_deserialization(project_with_datasets, load_dataset_with_injection): +def test_dataset_deserialization(project_with_datasets): """Test Dataset deserialization.""" - dataset = load_dataset_with_injection("dataset-1", project_with_datasets) + dataset = get_dataset_with_injection("dataset-1") dataset_types = { "date_created": [datetime.datetime], @@ -44,14 +45,14 @@ def test_dataset_deserialization(project_with_datasets, load_dataset_with_inject creator_types = {"email": str, "id": str, "name": str, "affiliation": str} - creator = load_dataset_with_injection("dataset-1", project_with_datasets).creators[0] + creator = get_dataset_with_injection("dataset-1").creators[0] for attribute, type_ in creator_types.items(): assert type(getattr(creator, attribute)) is type_ @pytest.mark.xfail -def test_uuid_migration(dataset_metadata, client): +def test_uuid_migration(dataset_metadata, project): """Test migration of id with UUID.""" dataset = old_datasets.Dataset.from_jsonld(dataset_metadata) diff --git a/tests/core/commands/test_status.py b/tests/core/commands/test_status.py index 70ed551320..0c7682a98e 100644 --- a/tests/core/commands/test_status.py +++ b/tests/core/commands/test_status.py @@ -27,12 +27,12 @@ from tests.utils import format_result_exception, write_and_commit_file -def test_status(runner, repository, subdirectory): +def test_status(runner, project, subdirectory): """Test status check.""" - source = os.path.join(repository.path, "source.txt") - output = os.path.join(repository.path, "data", "output.txt") + source = os.path.join(project.path, "source.txt") + output = os.path.join(project.path, "data", "output.txt") - write_and_commit_file(repository, source, "content") + write_and_commit_file(project.repository, source, "content") result = runner.invoke(cli, ["run", "cp", source, output]) assert 0 == result.exit_code, format_result_exception(result) @@ -42,7 +42,7 @@ def test_status(runner, repository, subdirectory): result.outdated_outputs or result.outdated_activities or result.modified_inputs or result.deleted_inputs ) - write_and_commit_file(repository, source, "new content") + write_and_commit_file(project.repository, source, "new content") result = get_status_command().build().execute().output @@ -53,18 +53,18 @@ def test_status(runner, repository, subdirectory): assert 0 == len(result.deleted_inputs) -def test_status_multiple_steps(runner, repository): +def test_status_multiple_steps(runner, project): """Test status check with multiple steps.""" source = os.path.join(os.getcwd(), "source.txt") intermediate = os.path.join(os.getcwd(), "intermediate.txt") output = os.path.join(os.getcwd(), "data", "output.txt") - write_and_commit_file(repository, source, "content") + write_and_commit_file(project.repository, source, "content") assert 0 == runner.invoke(cli, ["run", "cp", source, intermediate]).exit_code assert 0 == runner.invoke(cli, ["run", "cp", intermediate, output]).exit_code - write_and_commit_file(repository, source, "new content") + write_and_commit_file(project.repository, source, "new content") result = get_status_command().build().execute().output @@ -76,16 +76,16 @@ def test_status_multiple_steps(runner, repository): assert 0 == len(result.deleted_inputs) -def test_workflow_without_outputs(runner, repository): +def test_workflow_without_outputs(runner, project): """Test workflow without outputs.""" source = os.path.join(os.getcwd(), "source.txt") - write_and_commit_file(repository, source, "content") + write_and_commit_file(project.repository, source, "content") result = runner.invoke(cli, ["run", "cat", "--no-output", source]) assert 0 == result.exit_code, format_result_exception(result) - write_and_commit_file(repository, source, "new content") + write_and_commit_file(project.repository, source, "new content") result = get_status_command().build().execute().output @@ -96,21 +96,21 @@ def test_workflow_without_outputs(runner, repository): assert 0 == len(result.deleted_inputs) -def test_status_with_paths(runner, repository, subdirectory): +def test_status_with_paths(runner, project, subdirectory): """Test status check with multiple steps.""" - source1 = os.path.join(repository.path, "source1.txt") - output1 = os.path.join(repository.path, "data", "output1.txt") - source2 = os.path.join(repository.path, "source2.txt") - output2 = os.path.join(repository.path, "data", "output2.txt") + source1 = os.path.join(project.path, "source1.txt") + output1 = os.path.join(project.path, "data", "output1.txt") + source2 = os.path.join(project.path, "source2.txt") + output2 = os.path.join(project.path, "data", "output2.txt") - write_and_commit_file(repository, source1, "content") - write_and_commit_file(repository, source2, "content") + write_and_commit_file(project.repository, source1, "content") + write_and_commit_file(project.repository, source2, "content") assert 0 == runner.invoke(cli, ["run", "cp", source1, output1]).exit_code assert 0 == runner.invoke(cli, ["run", "cp", source2, output2]).exit_code - write_and_commit_file(repository, source1, "new content") - write_and_commit_file(repository, source2, "new content") + write_and_commit_file(project.repository, source1, "new content") + write_and_commit_file(project.repository, source2, "new content") result = get_status_command().build().execute(paths=[source1]).output @@ -154,18 +154,18 @@ def test_status_with_paths(runner, repository, subdirectory): assert 0 == len(result.deleted_inputs) -def test_status_with_path_all_generation(runner, repository): +def test_status_with_path_all_generation(runner, project): """Test that all generations are reported if only one of them is specified.""" - source = os.path.join(repository.path, "source.txt") - output1 = os.path.join(repository.path, "data", "output1.txt") - output2 = os.path.join(repository.path, "data", "output2.txt") + source = os.path.join(project.path, "source.txt") + output1 = os.path.join(project.path, "data", "output1.txt") + output2 = os.path.join(project.path, "data", "output2.txt") - write_and_commit_file(repository, source, "content") + write_and_commit_file(project.repository, source, "content") result = runner.invoke(cli, ["run", "--input", source, "touch", output1, output2]) assert 0 == result.exit_code, format_result_exception(result) - write_and_commit_file(repository, source, "new content") + write_and_commit_file(project.repository, source, "new content") result = get_status_command().build().execute(paths=[output1]).output @@ -177,7 +177,7 @@ def test_status_with_path_all_generation(runner, repository): assert 0 == len(result.deleted_inputs) -def test_status_works_in_dirty_repository(runner, client): +def test_status_works_in_dirty_repository(runner, project): """Test status doesn't need a clean project and doesn't change anything.""" source = project_context.path / "source" write_and_commit_file(project_context.repository, source, "source content") @@ -218,7 +218,7 @@ def test_status_ignore_deleted_files(runner, project): assert 0 == len(result.deleted_inputs) -def test_status_ignore_deleted_files_config(runner, client): +def test_status_ignore_deleted_files_config(runner, project): """Test status can ignore deleted files when proper config is set.""" write_and_commit_file(project_context.repository, "source", "source content") assert 0 == runner.invoke(cli, ["run", "head", "source"], stdout="upstream").exit_code @@ -238,7 +238,7 @@ def test_status_ignore_deleted_files_config(runner, client): assert 0 == len(result.deleted_inputs) -def test_status_deleted_files_reported_with_siblings(runner, client): +def test_status_deleted_files_reported_with_siblings(runner, project): """Test status reports deleted file if they have existing siblings.""" write_and_commit_file(project_context.repository, "source", "source content") assert 0 == runner.invoke(cli, ["run", "--input", "source", "touch", "deleted", "sibling"]).exit_code @@ -256,7 +256,7 @@ def test_status_deleted_files_reported_with_siblings(runner, client): assert 0 == len(result.deleted_inputs) -def test_status_deleted_files_reported_with_downstream(runner, client): +def test_status_deleted_files_reported_with_downstream(runner, project): """Test status reports deleted file if they have existing downstreams.""" write_and_commit_file(project_context.repository, "source", "source content") assert 0 == runner.invoke(cli, ["run", "head", "source"], stdout="deleted").exit_code @@ -275,13 +275,13 @@ def test_status_deleted_files_reported_with_downstream(runner, client): assert {"deleted"} == result.deleted_inputs -def test_status_deleted_inputs(runner, repository): +def test_status_deleted_inputs(runner, project): """Test status when an input is deleted.""" source = os.path.join(os.getcwd(), "source.txt") intermediate = os.path.join(os.getcwd(), "intermediate.txt") output = os.path.join(os.getcwd(), "data", "output.txt") - write_and_commit_file(repository, source, "content") + write_and_commit_file(project.repository, source, "content") result = runner.invoke(cli, ["run", "cp", source, intermediate]) assert 0 == result.exit_code, format_result_exception(result) diff --git a/tests/core/commands/test_storage.py b/tests/core/commands/test_storage.py index f80721a00f..8d66a0f81c 100644 --- a/tests/core/commands/test_storage.py +++ b/tests/core/commands/test_storage.py @@ -26,7 +26,7 @@ from tests.utils import format_result_exception -def test_lfs_storage_clean_no_remote(runner, project, client): +def test_lfs_storage_clean_no_remote(runner, project): """Test ``renku storage clean`` command with no remote set.""" with (project_context.path / "tracked").open("w") as fp: fp.write("tracked file") @@ -88,8 +88,8 @@ def test_lfs_storage_unpushed_clean(runner, project_with_remote): with (project_context.path / "tracked").open("w") as fp: fp.write("tracked file") subprocess.call(["git", "lfs", "track", "tracked"]) - project_with_remote.add("*") - project_with_remote.commit("tracked file") + project_with_remote.repository.add("*") + project_with_remote.repository.commit("tracked file") result = runner.invoke(cli, ["storage", "clean", "tracked"], catch_exceptions=False) @@ -97,7 +97,7 @@ def test_lfs_storage_unpushed_clean(runner, project_with_remote): assert "These paths were ignored as they are not pushed" in result.output -def test_lfs_migrate(runner, project, client): +def test_lfs_migrate(runner, project): """Test ``renku storage migrate`` command for large files in git.""" for _file in ["dataset_file", "workflow_file", "regular_file"]: @@ -136,7 +136,7 @@ def test_lfs_migrate(runner, project, client): assert ".renku/metadata/activities" not in changed_files -def test_lfs_migrate_no_changes(runner, project, client): +def test_lfs_migrate_no_changes(runner, project): """Test ``renku storage migrate`` command without broken files.""" for _file in ["dataset_file", "workflow_file", "regular_file"]: @@ -160,7 +160,7 @@ def test_lfs_migrate_no_changes(runner, project, client): assert previous_head == project_context.repository.head.commit.hexsha -def test_lfs_migrate_explicit_path(runner, project, client): +def test_lfs_migrate_explicit_path(runner, project): """Test ``renku storage migrate`` command explicit path.""" for _file in ["dataset_file", "workflow_file", "regular_file"]: diff --git a/tests/core/fixtures/core_database.py b/tests/core/fixtures/core_database.py index 6000a29851..d628249529 100644 --- a/tests/core/fixtures/core_database.py +++ b/tests/core/fixtures/core_database.py @@ -21,16 +21,14 @@ import copy import datetime from pathlib import Path -from typing import TYPE_CHECKING, Any, Callable, Dict, Iterator, Tuple +from typing import Generator, Iterator, Optional, Tuple import pytest from renku.core import errors -from renku.domain_model.project_context import project_context - -if TYPE_CHECKING: - from renku.infrastructure.database import Database - from renku.infrastructure.repository import Repository +from renku.infrastructure.database import Database +from tests.fixtures.repository import RenkuProject +from tests.utils import get_test_bindings class DummyStorage: @@ -79,63 +77,25 @@ def database() -> Iterator[Tuple["Database", DummyStorage]]: yield database, storage -def get_test_bindings() -> Dict[str, Any]: - """Return all possible bindings.""" - from renku.core.interface.activity_gateway import IActivityGateway - from renku.core.interface.database_gateway import IDatabaseGateway - from renku.core.interface.dataset_gateway import IDatasetGateway - from renku.core.interface.plan_gateway import IPlanGateway - from renku.core.interface.project_gateway import IProjectGateway - from renku.infrastructure.gateway.activity_gateway import ActivityGateway - from renku.infrastructure.gateway.database_gateway import DatabaseGateway - from renku.infrastructure.gateway.dataset_gateway import DatasetGateway - from renku.infrastructure.gateway.plan_gateway import PlanGateway - from renku.infrastructure.gateway.project_gateway import ProjectGateway - - constructor_bindings = { - IPlanGateway: lambda: PlanGateway(), - IActivityGateway: lambda: ActivityGateway(), - IDatabaseGateway: lambda: DatabaseGateway(), - IDatasetGateway: lambda: DatasetGateway(), - IProjectGateway: lambda: ProjectGateway(), - } - - return {"bindings": {}, "constructor_bindings": constructor_bindings} - - -def add_client_binding(bindings: Dict[str, Any]) -> Dict[str, Any]: - """Add required client bindings.""" - from renku.command.command_builder.client_dispatcher import ClientDispatcher - from renku.core.interface.client_dispatcher import IClientDispatcher - from renku.core.management.client import LocalClient - - client = LocalClient() - - client_dispatcher = ClientDispatcher() - client_dispatcher.push_created_client_to_stack(client) - bindings["bindings"].update({"LocalClient": client, IClientDispatcher: client_dispatcher}) - - return bindings - - @pytest.fixture -def with_injections_manager() -> Callable[["Repository"], None]: +def with_injection(): """Factory fixture for test injections manager.""" from renku.command.command_builder.command import inject, remove_injector + from renku.domain_model.project_context import project_context @contextlib.contextmanager - def with_injection(bindings, path: Path): + def with_injection_helper(bindings, constructor_bindings, path: Path): """Context manager to temporarily do injections.""" - def _bind(binder): - for key, value in bindings["bindings"].items(): + def bind(binder): + for key, value in bindings.items(): binder.bind(key, value) - for key, value in bindings["constructor_bindings"].items(): + for key, value in constructor_bindings.items(): binder.bind_to_constructor(key, value) return binder - inject.configure(_bind, bind_in_runtime=False) + inject.configure(bind, bind_in_runtime=False) with project_context.with_path(path, save_changes=True): try: @@ -143,19 +103,16 @@ def _bind(binder): finally: remove_injector() - def test_injection_manager_helper(repository: "Repository"): - bindings = get_test_bindings() - add_client_binding(bindings=bindings) - return with_injection(bindings=bindings, path=repository.path) + def test_injection_manager_helper(project: Optional["RenkuProject"] = None): + path = project.path if project else project_context.path + bindings, constructor_bindings = get_test_bindings() + return with_injection_helper(bindings=bindings, constructor_bindings=constructor_bindings, path=path) return test_injection_manager_helper @pytest.fixture -def project_with_injection(repository, with_injections_manager): +def project_with_injection(project, with_injection) -> Generator[RenkuProject, None, None]: """Fixture for context manager with project and database injection.""" - with with_injections_manager(repository): - yield repository - - -client_database_injection_manager = with_injections_manager + with with_injection(): + yield project diff --git a/tests/core/fixtures/core_datasets.py b/tests/core/fixtures/core_datasets.py index 29f27c88bc..e850026417 100644 --- a/tests/core/fixtures/core_datasets.py +++ b/tests/core/fixtures/core_datasets.py @@ -17,16 +17,13 @@ # limitations under the License. """Renku core fixtures for datasets testing.""" -from contextlib import contextmanager -from typing import TYPE_CHECKING, Generator +from typing import Generator import pytest from renku.core.dataset.dataset import create_dataset from renku.core.dataset.dataset_add import add_to_dataset - -if TYPE_CHECKING: - from renku.infrastructure.repository import Repository +from tests.fixtures.repository import RenkuProject @pytest.fixture @@ -50,48 +47,21 @@ def request_callback(request): @pytest.fixture -def project_with_datasets(repository, directory_tree, with_injections_manager) -> Generator["Repository", None, None]: - """A client with datasets.""" +def project_with_datasets(project, directory_tree, with_injection) -> Generator[RenkuProject, None, None]: + """A project with datasets.""" from renku.domain_model.provenance.agent import Person person_1 = Person.from_string("P1 [IANA]") person_2 = Person.from_string("P2 ") - with with_injections_manager(repository): + with with_injection(): create_dataset(name="dataset-1", keywords=["dataset", "1"], creators=[person_1]) dataset = add_to_dataset("dataset-2", urls=[str(p) for p in directory_tree.glob("*")], create=True, copy=True) dataset.keywords = ["dataset", "2"] dataset.creators = [person_1, person_2] - repository.add(all=True) - repository.commit("add files to datasets") - - yield repository - - -@pytest.fixture -def load_dataset_with_injection(with_injections_manager): - """Load dataset method with injection setup.""" - - def _inner(name, client): - from tests.utils import load_dataset - - with with_injections_manager(client): - return load_dataset(name) - - return _inner - - -@pytest.fixture -def get_datasets_provenance_with_injection(client_database_injection_manager): - """Get dataset provenance method with injection setup.""" - - @contextmanager - def _inner(client): - from renku.core.dataset.datasets_provenance import DatasetsProvenance - - with client_database_injection_manager(client): - yield DatasetsProvenance() + project.repository.add(all=True) + project.repository.commit("add files to datasets") - return _inner + yield project diff --git a/tests/core/fixtures/core_workflow.py b/tests/core/fixtures/core_workflow.py index 919d5c7dc4..88e1382a98 100644 --- a/tests/core/fixtures/core_workflow.py +++ b/tests/core/fixtures/core_workflow.py @@ -18,14 +18,14 @@ """Renku core fixtures for workflow testing.""" from datetime import datetime, timedelta +from typing import Generator import pytest -from renku.domain_model.provenance.activity import Activity from renku.domain_model.workflow.composite_plan import CompositePlan from renku.domain_model.workflow.parameter import CommandInput, CommandOutput, CommandParameter from renku.domain_model.workflow.plan import Plan -from renku.infrastructure.gateway.activity_gateway import ActivityGateway +from tests.fixtures.repository import RenkuProject from tests.utils import create_dummy_plan @@ -93,13 +93,16 @@ def create_run(name: str) -> Plan: @pytest.fixture -def project_with_runs(repository, with_injections_manager): - """A client with runs.""" +def project_with_runs(project, with_injection) -> Generator[RenkuProject, None, None]: + """A project with runs.""" + from renku.domain_model.provenance.activity import Activity + from renku.infrastructure.gateway.activity_gateway import ActivityGateway def create_activity(plan, date, index) -> Activity: """Create an activity with id /activities/index.""" return Activity.from_plan( plan=plan, + repository=project.repository, id=Activity.generate_id(str(index)), started_at_time=date, ended_at_time=date + timedelta(seconds=1), @@ -133,7 +136,7 @@ def create_activity(plan, date, index) -> Activity: parameters=[("int-parameter", 43, "-n "), ("str-parameter", "some value", None)], ) - with with_injections_manager(repository): + with with_injection(): activity_1 = create_activity(plan_1, date_1, index=1) activity_2 = create_activity(plan_2, date_2, index=2) @@ -142,7 +145,7 @@ def create_activity(plan, date, index) -> Activity: activity_gateway.add(activity_1) activity_gateway.add(activity_2) - repository.add(all=True) - repository.commit("Add runs") + project.repository.add(all=True) + project.repository.commit("Add runs") - yield repository + yield project diff --git a/tests/core/management/test_repository.py b/tests/core/management/test_repository.py index 49f901c9c7..c2902b39d1 100644 --- a/tests/core/management/test_repository.py +++ b/tests/core/management/test_repository.py @@ -25,17 +25,17 @@ from renku.domain_model.project_context import project_context -def test_latest_version(project, with_injections_manager): +def test_latest_version(project, with_injection): """Test returning the latest version of `SoftwareAgent`.""" from renku import __version__ create_dataset_command().build().execute("ds1", title="", description="", creators=[]) - with project_context.with_path(project.path), with_injections_manager(project): + with project_context.with_path(project.path), with_injection(): assert __version__ == project_context.latest_agent -def test_latest_version_user_commits(repository, with_injections_manager): +def test_latest_version_user_commits(project, with_injection): """Test retrieval of `SoftwareAgent` with latest non-renku command.""" from renku import __version__ @@ -44,10 +44,10 @@ def test_latest_version_user_commits(repository, with_injections_manager): file = Path("my-file") file.write_text("123") - repository.add(file) - repository.commit("added my-file") + project.repository.add(file) + project.repository.commit("added my-file") - with project_context.with_path(repository.path), with_injections_manager(repository): + with project_context.with_path(project.path), with_injection(): assert __version__ == project_context.latest_agent diff --git a/tests/core/management/test_storage.py b/tests/core/management/test_storage.py index b168f5eb68..40738bcdb1 100644 --- a/tests/core/management/test_storage.py +++ b/tests/core/management/test_storage.py @@ -26,9 +26,7 @@ @pytest.mark.parametrize("path", [".", "datasets"]) -def test_no_renku_metadata_in_lfs( - project_with_datasets, no_lfs_size_limit, path, subdirectory, with_injections_manager -): +def test_no_renku_metadata_in_lfs(project_with_datasets, no_lfs_size_limit, path, subdirectory, with_injection): """Test .renku directory and its content are not included in the LFS.""" # Explicitly set .renku to not being ignored (project_with_datasets.path / ".renkulfsignore").write_text("!.renku") @@ -40,7 +38,7 @@ def test_no_renku_metadata_in_lfs( file2 = path_in_renku_metadata_directory / "file2" file2.write_text("123") - with with_injections_manager(project_with_datasets): + with with_injection(): track_paths_in_storage(file1, file2, path_in_renku_metadata_directory) attributes = (project_with_datasets.path / ".gitattributes").read_text() diff --git a/tests/core/management/test_template.py b/tests/core/management/test_template.py index f054584cb7..fbcc46484b 100644 --- a/tests/core/management/test_template.py +++ b/tests/core/management/test_template.py @@ -25,7 +25,7 @@ from renku.core.template.template import ( FileAction, TemplateAction, - copy_template_to_client, + copy_template_to_project, fetch_templates_source, get_file_actions, ) @@ -63,11 +63,11 @@ def test_template_fetch_invalid_git_reference(): fetch_templates_source(source=TEMPLATES_URL, reference="invalid-ref") -def test_check_for_template_update(client_with_template, templates_source, client_database_injection_manager): +def test_check_for_template_update(project_with_template, templates_source, with_injection): """Test checking for a template update.""" templates_source.update(id="dummy", version="2.0.0") - with client_database_injection_manager(client_with_template): + with with_injection(): project = project_context.project updates_available, _, current_version, new_version = check_for_template_update(project) @@ -76,11 +76,11 @@ def test_check_for_template_update(client_with_template, templates_source, clien assert "2.0.0" == new_version -def test_template_update_files(client_with_template, templates_source, with_injections_manager): +def test_template_update_files(project_with_template, templates_source, with_injection): """Test template update.""" templates_source.update(id="dummy", version="2.0.0") - with with_injections_manager(client_with_template): + with with_injection(): files_before = {p: Path(p).read_text() for p in project_context.project.template_files} update_template(force=False, interactive=False, dry_run=False) @@ -89,9 +89,9 @@ def test_template_update_files(client_with_template, templates_source, with_inje assert Path(file).read_text() != files_before[file] -def test_template_update_source_failure(client_with_template, client_database_injection_manager): +def test_template_update_source_failure(project_with_template, with_injection): """Test template update with broken template source.""" - with client_database_injection_manager(client_with_template): + with with_injection(): with pytest.raises(errors.TemplateUpdateError): update_template(force=False, interactive=False, dry_run=False) @@ -109,7 +109,7 @@ def test_template_update_source_failure(client_with_template, client_database_in (FileAction.KEEP, "project"), ], ) -def test_copy_template_actions(project, rendered_template, action, content_type, client_database_injection_manager): +def test_copy_template_actions(project, rendered_template, action, content_type, with_injection): """Test FileActions when copying a template.""" project_content = (project_context.path / "Dockerfile").read_text() template_content = (rendered_template.path / "Dockerfile").read_text() @@ -117,8 +117,8 @@ def test_copy_template_actions(project, rendered_template, action, content_type, # NOTE: Ignore all other files expect the Dockerfile actions = {f: FileAction.IGNORE_UNCHANGED_REMOTE for f in rendered_template.get_files()} actions["Dockerfile"] = action - with client_database_injection_manager(project): - copy_template_to_client(rendered_template=rendered_template, project=project_context.project, actions=actions) + with with_injection(): + copy_template_to_project(rendered_template=rendered_template, project=project_context.project, actions=actions) # NOTE: Make sure that files have some content assert project_content @@ -135,9 +135,9 @@ def test_copy_template_actions(project, rendered_template, action, content_type, assert expected_content == (project_context.path / "Dockerfile").read_text() -def test_get_file_actions_for_initialize(client, rendered_template, client_database_injection_manager): +def test_get_file_actions_for_initialize(project, rendered_template, with_injection): """Test getting file action when initializing.""" - with client_database_injection_manager(client): + with with_injection(): actions = get_file_actions( rendered_template=rendered_template, template_action=TemplateAction.INITIALIZE, interactive=False ) @@ -152,9 +152,9 @@ def test_get_file_actions_for_initialize(client, rendered_template, client_datab assert FileAction.KEEP == actions[kept_file] -def test_get_file_actions_for_set(client, rendered_template, client_database_injection_manager): +def test_get_file_actions_for_set(project, rendered_template, with_injection): """Test getting file action when setting a template.""" - with client_database_injection_manager(client): + with with_injection(): actions = get_file_actions( rendered_template=rendered_template, template_action=TemplateAction.SET, interactive=False ) @@ -167,11 +167,9 @@ def test_get_file_actions_for_set(client, rendered_template, client_database_inj assert FileAction.KEEP == actions[kept_file] -def test_get_file_actions_for_update( - client_with_template, rendered_template_with_update, client_database_injection_manager -): +def test_get_file_actions_for_update(project_with_template, rendered_template_with_update, with_injection): """Test getting file action when updating a template.""" - with client_database_injection_manager(client_with_template): + with with_injection(): actions = get_file_actions( rendered_template=rendered_template_with_update, template_action=TemplateAction.UPDATE, interactive=False ) @@ -182,13 +180,11 @@ def test_get_file_actions_for_update( assert FileAction.OVERWRITE == actions[remotely_modified] -def test_update_with_locally_modified_file( - client_with_template, rendered_template_with_update, client_database_injection_manager -): +def test_update_with_locally_modified_file(project_with_template, rendered_template_with_update, with_injection): """Test a locally modified file that is remotely updated won't change.""" (project_context.path / "Dockerfile").write_text("Local modification") - with client_database_injection_manager(client_with_template): + with with_injection(): actions = get_file_actions( rendered_template=rendered_template_with_update, template_action=TemplateAction.UPDATE, interactive=False ) @@ -196,13 +192,11 @@ def test_update_with_locally_modified_file( assert FileAction.KEEP == actions["Dockerfile"] -def test_update_with_locally_deleted_file( - client_with_template, rendered_template_with_update, client_database_injection_manager -): +def test_update_with_locally_deleted_file(project_with_template, rendered_template_with_update, with_injection): """Test a locally deleted file that is remotely updated won't be re-created.""" (project_context.path / "Dockerfile").unlink() - with client_database_injection_manager(client_with_template): + with with_injection(): actions = get_file_actions( rendered_template=rendered_template_with_update, template_action=TemplateAction.UPDATE, interactive=False ) @@ -212,7 +206,7 @@ def test_update_with_locally_deleted_file( @pytest.mark.parametrize("delete", [False, True]) def test_update_with_locally_changed_immutable_file( - client_with_template, rendered_template_with_update, client_database_injection_manager, delete + project_with_template, rendered_template_with_update, with_injection, delete ): """Test a locally deleted file that is remotely updated won't be re-created.""" if delete: @@ -222,7 +216,7 @@ def test_update_with_locally_changed_immutable_file( with pytest.raises( errors.TemplateUpdateError, match="Can't update template as immutable template file .* has local changes." - ), client_database_injection_manager(client_with_template): + ), with_injection(): get_file_actions( rendered_template=rendered_template_with_update, template_action=TemplateAction.UPDATE, interactive=False ) diff --git a/tests/core/metadata/test_repository.py b/tests/core/metadata/test_repository.py index 93d8577a94..7466f96b37 100644 --- a/tests/core/metadata/test_repository.py +++ b/tests/core/metadata/test_repository.py @@ -315,3 +315,18 @@ def test_get_content_from_lfs(tmp_path): repository.copy_content_to_file("non-existing", checksum=valid_checksum, output_path=output_path) assert "Updated on 01.06.2022" in output_path.read_text() + + +@pytest.mark.parametrize( + "paths, ignored", + ( + ([".renku.lock"], [".renku.lock"]), + (["not ignored", "lib/foo", "build/html"], ["lib/foo", "build/html"]), + (["not ignored"], []), + ), +) +def test_ignored_paths(paths, ignored, project): + """Test resolution of ignored paths.""" + from renku.domain_model.project_context import project_context + + assert project_context.repository.get_ignored_paths(*paths) == ignored diff --git a/tests/core/models/test_activity.py b/tests/core/models/test_activity.py index 8ee5466759..edca7f999d 100644 --- a/tests/core/models/test_activity.py +++ b/tests/core/models/test_activity.py @@ -34,7 +34,7 @@ def test_activity_parameter_values(project_with_injection, mocker): def get_entity_from_revision_mock(repository, path, revision=None, bypass_cache=False): return Entity(checksum="abcdefg", id=uuid4().hex, path=path) - def get_git_user_mock(client): + def get_git_user_mock(repository): return Person(id=uuid4().hex, name="John Doe", email="john@doe.com") mocker.patch("renku.domain_model.provenance.activity.get_entity_from_revision", get_entity_from_revision_mock) @@ -75,6 +75,7 @@ def get_git_user_mock(client): activity = Activity.from_plan( plan, + repository=project_with_injection.repository, project_gateway=project_gateway, started_at_time=datetime.utcnow(), ended_at_time=datetime.utcnow(), diff --git a/tests/core/models/test_shacl_schema.py b/tests/core/models/test_shacl_schema.py index b9fc947f73..8c066afa4c 100644 --- a/tests/core/models/test_shacl_schema.py +++ b/tests/core/models/test_shacl_schema.py @@ -30,7 +30,7 @@ @pytest.mark.skip(reason="FIXME correct this when implementing renku graph export") -def test_dataset_shacl(tmpdir, runner, project, client): +def test_dataset_shacl(tmpdir, runner, project): """Test dataset metadata structure.""" force_dataset_path = Path(__file__).parent.parent.parent / "data" / "force_dataset_shacl.json" @@ -68,14 +68,14 @@ def test_dataset_shacl(tmpdir, runner, project, client): assert r is True, t -def test_project_shacl(project, with_injections_manager): +def test_project_shacl(project, with_injection): """Test project metadata structure.""" from renku.command.schema.project import ProjectSchema from renku.domain_model.provenance.agent import Person path = Path(__file__).parent.parent.parent / "data" / "force_project_shacl.json" - with with_injections_manager(project): + with with_injection(): project = project_context.project project.creator = Person(email="johndoe@example.com", name="Johnny Doe") diff --git a/tests/core/plugins/test_session.py b/tests/core/plugins/test_session.py index 9191e8d61e..30faeb1d6d 100644 --- a/tests/core/plugins/test_session.py +++ b/tests/core/plugins/test_session.py @@ -33,7 +33,6 @@ def fake_start( image_name, project_name, config, - client, cpu_request, mem_request, disk_request, @@ -83,13 +82,13 @@ def fake_pre_start_checks(self): ) def test_session_start( run_shell, - client, + project, provider_name, session_provider, provider_patches, parameters, result, - client_database_injection_manager, + with_injection, ): with patch.multiple( session_provider, @@ -104,7 +103,7 @@ def test_session_start( ) assert provider_implementation is not None - with client_database_injection_manager(client): + with with_injection(): if not isinstance(result, str) and issubclass(result, Exception): with pytest.raises(result): session_start(provider=provider_name, config_path=None, **parameters) @@ -129,13 +128,13 @@ def test_session_start( ) def test_session_stop( run_shell, - client, + project, session_provider, provider_name, parameters, provider_patches, result, - client_database_injection_manager, + with_injection, ): with patch.multiple(session_provider, session_stop=fake_stop, **provider_patches): provider_implementation = next( @@ -143,7 +142,7 @@ def test_session_stop( ) assert provider_implementation is not None - with client_database_injection_manager(client): + with with_injection(): if result is not None and issubclass(result, Exception): with pytest.raises(result): session_stop(provider=provider_name, **parameters) @@ -161,16 +160,16 @@ def test_session_stop( @pytest.mark.parametrize("provider_exists,result", [(True, ["0xdeadbeef"]), (False, ParameterError)]) def test_session_list( run_shell, - client, + project, provider_name, session_provider, provider_patches, provider_exists, result, - client_database_injection_manager, + with_injection, ): with patch.multiple(session_provider, session_list=fake_session_list, **provider_patches): - with client_database_injection_manager(client): + with with_injection(): if not isinstance(result, list) and issubclass(result, Exception): with pytest.raises(result): session_list(provider=provider_name if provider_exists else "no_provider", config_path=None) diff --git a/tests/core/test_activity.py b/tests/core/test_activity.py index 3e225a3bbe..86607cce33 100644 --- a/tests/core/test_activity.py +++ b/tests/core/test_activity.py @@ -22,10 +22,11 @@ from renku.core.workflow.activity import revert_activity from renku.infrastructure.gateway.activity_gateway import ActivityGateway from renku.infrastructure.gateway.plan_gateway import PlanGateway +from renku.infrastructure.repository import Repository from tests.utils import create_and_commit_files, create_dummy_activity, create_dummy_plan -def create_dummy_activities(repository): +def create_dummy_activities(repository: Repository): """Create activities for tests in this file.""" # Create files so that they can be found by git create_and_commit_files( @@ -64,7 +65,7 @@ def create_dummy_activities(repository): def test_revert(project_with_injection): """Test reverting an activity.""" - _, activity, _, _ = create_dummy_activities(project_with_injection) + _, activity, _, _ = create_dummy_activities(project_with_injection.repository) revert_activity(activity_id=activity.id, delete_plan=False, force=True, metadata_only=False) @@ -82,7 +83,7 @@ def test_revert(project_with_injection): def test_revert_metadata_only(project_with_injection): """Test reverting an activity without reverting its generations.""" - _, activity, _, _ = create_dummy_activities(project_with_injection) + _, activity, _, _ = create_dummy_activities(project_with_injection.repository) revert_activity(activity_id=activity.id, delete_plan=False, force=True, metadata_only=True) @@ -95,7 +96,7 @@ def test_revert_metadata_only(project_with_injection): def test_revert_and_delete_plan(project_with_injection): """Test reverting an activity and deleting its plan.""" - _, activity, _, other = create_dummy_activities(project_with_injection) + _, activity, _, other = create_dummy_activities(project_with_injection.repository) plan_gateway = PlanGateway() revert_activity(activity_id=activity.id, delete_plan=True, force=True, metadata_only=True) diff --git a/tests/fixtures/common.py b/tests/fixtures/common.py index b402f9c1c1..848bb54ec6 100644 --- a/tests/fixtures/common.py +++ b/tests/fixtures/common.py @@ -19,7 +19,7 @@ import os from pathlib import Path -from typing import List +from typing import Generator, List import pytest @@ -78,21 +78,21 @@ def data_repository(directory_tree): @pytest.fixture -def no_lfs_size_limit(repository): +def no_lfs_size_limit(project): """Configure environment to track all files in LFS independent of size.""" set_value("renku", "lfs_threshold", "0b") - repository.add(".renku/renku.ini") - repository.commit("update renku.ini") + project.repository.add(".renku/renku.ini") + project.repository.commit("update renku.ini") yield @pytest.fixture -def no_datadir_commit_warning(repository): +def no_datadir_commit_warning(project): """Configure pre-commit hook to ignore files added to a datasets data directory.""" set_value("renku", "check_datadir_files", "false") - repository.add(".renku/renku.ini") - repository.commit("update renku.ini") + project.repository.add(".renku/renku.ini") + project.repository.commit("update renku.ini") yield @@ -106,3 +106,11 @@ def large_file(tmp_path): file.write("some data") yield path + + +@pytest.fixture +def transaction_id(project) -> Generator[str, None, None]: + """Return current transaction ID.""" + from renku.domain_model.project_context import project_context + + yield project_context.transaction_id diff --git a/tests/fixtures/repository.py b/tests/fixtures/repository.py index a15b09e4c8..80b4b178c4 100644 --- a/tests/fixtures/repository.py +++ b/tests/fixtures/repository.py @@ -21,6 +21,7 @@ import os import secrets import shutil +from dataclasses import dataclass, field from pathlib import Path from typing import Generator @@ -28,6 +29,7 @@ from click.testing import CliRunner from renku.core.config import set_value +from renku.core.constant import DATABASE_PATH, POINTERS, RENKU_HOME from renku.core.util.contexts import chdir from renku.domain_model.project_context import ProjectContext, project_context from renku.infrastructure.repository import Repository @@ -35,6 +37,22 @@ from tests.utils import format_result_exception, modified_environ +@dataclass +class RenkuProject: + """A Renku project for use in tests to access project properties.""" + + path: Path + repository: Repository + metadata_path: Path = field(init=False) + database_path: Path = field(init=False) + pointers_path: Path = field(init=False) + + def __post_init__(self): + self.metadata_path = self.path / RENKU_HOME + self.database_path = self.path / RENKU_HOME / DATABASE_PATH + self.pointers_path = self.path / RENKU_HOME / POINTERS + + @contextlib.contextmanager def isolated_filesystem(path: Path, name: str = None, delete: bool = True): """Click CliRunner ``isolated_filesystem`` but xdist compatible.""" @@ -77,7 +95,7 @@ def fake_home(tmp_path, monkeypatch) -> Generator[Path, None, None]: @pytest.fixture -def project(fake_home) -> Generator[Repository, None, None]: +def project(fake_home) -> Generator[RenkuProject, None, None]: """A Renku test project.""" project_context.clear() @@ -89,16 +107,4 @@ def project(fake_home) -> Generator[Repository, None, None]: repository = Repository(project_path, search_parent_directories=True) project_context.repository = repository - yield repository - - -@pytest.fixture -def repository(project) -> Generator[Repository, None, None]: - """Return a Renku repository.""" - yield project - - -@pytest.fixture -def client(project) -> Generator[Repository, None, None]: - """Return a Renku repository.""" - yield project + yield RenkuProject(path=repository.path, repository=repository) diff --git a/tests/fixtures/session.py b/tests/fixtures/session.py index b37fd1f2db..0c96274223 100644 --- a/tests/fixtures/session.py +++ b/tests/fixtures/session.py @@ -28,9 +28,8 @@ def dummy_session_provider(): from pathlib import Path from uuid import uuid4 - from renku.core.management.client import LocalClient from renku.core.plugin import hookimpl - from renku.core.plugin import pluginmanager as pluginmanager + from renku.core.plugin import pluginmanager as plugin_manager from renku.domain_model.session import ISessionProvider, Session class _DummySessionProvider(ISessionProvider): @@ -57,7 +56,6 @@ def session_start( image_name: str, project_name: str, config: Optional[Dict[str, Any]], - client: LocalClient, cpu_request: Optional[float] = None, mem_request: Optional[str] = None, disk_request: Optional[str] = None, @@ -82,7 +80,7 @@ def pre_start_checks(self): pass plugin = _DummySessionProvider() - pm = pluginmanager.get_plugin_manager() + pm = plugin_manager.get_plugin_manager() pm.register(plugin) yield diff --git a/tests/fixtures/templates.py b/tests/fixtures/templates.py index afa3714016..06ebd509af 100644 --- a/tests/fixtures/templates.py +++ b/tests/fixtures/templates.py @@ -25,8 +25,8 @@ from packaging.version import Version from renku.domain_model.project_context import project_context -from renku.infrastructure.repository import Repository from renku.version import __version__ as renku_version +from tests.fixtures.repository import RenkuProject @pytest.fixture @@ -250,22 +250,22 @@ def rendered_template(source_template, template_metadata): @pytest.fixture -def client_with_template(repository, rendered_template, with_injections_manager) -> Generator[Repository, None, None]: - """A client with a dummy template.""" - from renku.core.template.template import FileAction, copy_template_to_client +def project_with_template(project, rendered_template, with_injection) -> Generator[RenkuProject, None, None]: + """A project with a dummy template.""" + from renku.core.template.template import FileAction, copy_template_to_project - with with_injections_manager(repository): + with with_injection(): actions = {f: FileAction.OVERWRITE for f in rendered_template.get_files()} - project = project_context.project + project_object = project_context.project - copy_template_to_client(rendered_template=rendered_template, project=project, actions=actions) + copy_template_to_project(rendered_template=rendered_template, project=project_object, actions=actions) - project.template_files = [str(project_context.path / f) for f in rendered_template.get_files()] + project_object.template_files = [str(project_context.path / f) for f in rendered_template.get_files()] - repository.add(all=True) - repository.commit("Set a dummy template") + project.repository.add(all=True) + project.repository.commit("Set a dummy template") - yield repository + yield project @pytest.fixture diff --git a/tests/service/fixtures/service_projects.py b/tests/service/fixtures/service_projects.py index 39a830f5cd..ac706722eb 100644 --- a/tests/service/fixtures/service_projects.py +++ b/tests/service/fixtures/service_projects.py @@ -27,10 +27,11 @@ from renku.core.util.os import normalize_to_ascii from renku.infrastructure.repository import Repository +from tests.fixtures.repository import RenkuProject @pytest.fixture -def project_metadata(project) -> Generator[Tuple["Repository", Dict[str, Any]], None, None]: +def project_metadata(project) -> Generator[Tuple["RenkuProject", Dict[str, Any]], None, None]: """Create project with metadata.""" name = project.path.name metadata = { diff --git a/tests/service/jobs/test_datasets.py b/tests/service/jobs/test_datasets.py index 471b9a44eb..c47f10c6db 100644 --- a/tests/service/jobs/test_datasets.py +++ b/tests/service/jobs/test_datasets.py @@ -655,7 +655,7 @@ def test_delay_unlink_dataset_job(svc_client_cache, it_remote_repo_url_temp_bran assert updated_job assert {"unlinked", "remote_branch"} == updated_job.ctrl_result["result"].keys() - assert ["data/data/data1"] == updated_job.ctrl_result["result"]["unlinked"] + assert ["data/data1"] == updated_job.ctrl_result["result"]["unlinked"] @pytest.mark.service @@ -720,4 +720,4 @@ def test_unlink_dataset_sync(svc_client_cache, it_remote_repo_url_temp_branch, v assert updated_job assert {"unlinked", "remote_branch"} == updated_job.ctrl_result["result"].keys() - assert ["data/data/data1"] == updated_job.ctrl_result["result"]["unlinked"] + assert ["data/data1"] == updated_job.ctrl_result["result"]["unlinked"] diff --git a/tests/service/views/test_cache_views.py b/tests/service/views/test_cache_views.py index f8a0cc1467..1343719922 100644 --- a/tests/service/views/test_cache_views.py +++ b/tests/service/views/test_cache_views.py @@ -27,7 +27,7 @@ import pytest from renku.core.dataset.context import DatasetContext -from renku.core.git import commit +from renku.core.util.git import with_commit from renku.domain_model.git import GitURL from renku.domain_model.project import Project from renku.domain_model.project_context import project_context @@ -1023,17 +1023,19 @@ def test_migrating_protected_branch(svc_protected_old_repo): @pytest.mark.integration @pytest.mark.serial @retry_failed -def test_cache_gets_synchronized( - local_remote_repository, directory_tree, quick_cache_synchronization, client_database_injection_manager -): +def test_cache_gets_synchronized(local_remote_repository, directory_tree, quick_cache_synchronization, with_injection): """Test that the cache stays synchronized with the remote repository.""" from renku.domain_model.provenance.agent import Person svc_client, identity_headers, project_id, remote_repo, remote_repo_checkout = local_remote_repository with project_context.with_path(remote_repo_checkout.path): - with client_database_injection_manager(remote_repo_checkout): - with commit(commit_message="Create dataset"): + with with_injection(remote_repo_checkout): + with with_commit( + repository=project_context.repository, + transaction_id=project_context.transaction_id, + commit_message="Create dataset", + ): with DatasetContext(name="my_dataset", create=True, commit_database=True) as dataset: dataset.creators = [Person(name="me", email="me@example.com", id="me_id")] @@ -1062,7 +1064,7 @@ def test_cache_gets_synchronized( remote_repo_checkout.pull() - with client_database_injection_manager(remote_repo_checkout): + with with_injection(remote_repo_checkout): datasets = DatasetGateway().get_all_active_datasets() assert 2 == len(datasets) diff --git a/tests/service/views/test_graph_views.py b/tests/service/views/test_graph_views.py index 1949ecb3bb..a1cd81e41f 100644 --- a/tests/service/views/test_graph_views.py +++ b/tests/service/views/test_graph_views.py @@ -50,7 +50,8 @@ def test_graph_export_view(svc_client_cache, it_remote_repo_url): in response.json["result"]["graph"] ) assert "mailto:contact@justsam.io" in response.json["result"]["graph"] - assert len(response.json["result"]["graph"]) > 5000 + assert "invalidatedAtTime" not in response.json["result"]["graph"] + assert len(response.json["result"]["graph"]) > 4500 @pytest.mark.service @@ -93,7 +94,7 @@ def test_graph_export_no_callback(svc_client_cache, it_remote_repo_url): in response.json["result"]["graph"] ) assert "mailto:contact@justsam.io" in response.json["result"]["graph"] - assert len(response.json["result"]["graph"]) > 5000 + assert len(response.json["result"]["graph"]) > 4500 @pytest.mark.service @@ -115,4 +116,4 @@ def test_graph_export_no_revision(svc_client_cache, it_remote_repo_url): in response.json["result"]["graph"] ) assert "mailto:contact@justsam.io" in response.json["result"]["graph"] - assert len(response.json["result"]["graph"]) > 5000 + assert len(response.json["result"]["graph"]) > 4500 diff --git a/tests/service/views/test_templates_views.py b/tests/service/views/test_templates_views.py index d75a73e453..b98f3528ed 100644 --- a/tests/service/views/test_templates_views.py +++ b/tests/service/views/test_templates_views.py @@ -123,7 +123,7 @@ def test_read_manifest_from_wrong_template(svc_client_with_templates, template_u @pytest.mark.service @pytest.mark.integration @retry_failed -def test_create_project_from_template(svc_client_templates_creation, client_database_injection_manager): +def test_create_project_from_template(svc_client_templates_creation, with_injection): """Check creating project from a valid template.""" from renku.ui.service.serializers.headers import RenkuHeaders from renku.ui.service.utils import CACHE_PROJECTS_PATH @@ -155,7 +155,7 @@ def test_create_project_from_template(svc_client_templates_creation, client_data assert reader.get_value("user", "name") == user_data["name"] with project_context.with_path(project_path): - with client_database_injection_manager(project_context.repository): + with with_injection(): project = project_context.project assert project_context.datadir == "my-folder/" diff --git a/tests/service/views/test_version_views.py b/tests/service/views/test_version_views.py index eb3e87b606..c176ddb189 100644 --- a/tests/service/views/test_version_views.py +++ b/tests/service/views/test_version_views.py @@ -16,7 +16,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """Renku service version view tests.""" -from renku.core.management.migrate import SUPPORTED_PROJECT_VERSION +from renku.core.migration.migrate import SUPPORTED_PROJECT_VERSION from renku.ui.service.views.api_versions import MAXIMUM_VERSION, MINIMUM_VERSION diff --git a/tests/utils.py b/tests/utils.py index b54fbc7dec..c1fe7ce333 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -25,23 +25,21 @@ from datetime import datetime, timedelta from functools import wraps from pathlib import Path -from typing import Any, Dict, Iterable, Iterator, List, Optional, Tuple, Union +from typing import TYPE_CHECKING, Any, Callable, Dict, Generator, Iterable, Iterator, List, Optional, Tuple, Type, Union import pytest from flaky import flaky -from renku.command.command_builder.command import inject -from renku.core.dataset.datasets_provenance import DatasetsProvenance +from renku.command.command_builder.command import inject, replace_injection from renku.core.interface.dataset_gateway import IDatasetGateway from renku.domain_model.dataset import Dataset -from renku.domain_model.entity import Entity from renku.domain_model.project_context import project_context -from renku.domain_model.provenance.activity import Activity, Association, Generation, Usage -from renku.domain_model.provenance.agent import Person, SoftwareAgent -from renku.domain_model.provenance.parameter import ParameterValue -from renku.domain_model.workflow.parameter import CommandInput, CommandOutput, CommandParameter, MappedIOStream -from renku.domain_model.workflow.plan import Plan -from renku.infrastructure.repository import Repository + +if TYPE_CHECKING: + from renku.core.dataset.datasets_provenance import DatasetsProvenance + from renku.domain_model.provenance.activity import Activity, Generation, Usage + from renku.domain_model.workflow.plan import Plan + from renku.infrastructure.repository import Repository def raises(error): @@ -79,7 +77,7 @@ def make_dataset_add_payload(project_id, urls, name=None): } -def assert_dataset_is_mutated(old: Dataset, new: Dataset, mutator=None): +def assert_dataset_is_mutated(old: "Dataset", new: "Dataset", mutator=None): """Check metadata is updated correctly after dataset mutation.""" assert old.name == new.name assert old.initial_identifier == new.initial_identifier @@ -147,23 +145,69 @@ def format_result_exception(result): return f"Stack Trace:\n{stacktrace}\n\nOutput:\n{result.output + stderr}" -def load_dataset(name: str) -> Optional[Dataset]: +def load_dataset(name: str) -> Optional["Dataset"]: """Load dataset from disk.""" + from renku.core.dataset.datasets_provenance import DatasetsProvenance + datasets_provenance = DatasetsProvenance() return datasets_provenance.get_by_name(name) +def get_test_bindings() -> Tuple[Dict, Dict[Type, Callable[[], Any]]]: + """Return all possible bindings.""" + from renku.core.interface.activity_gateway import IActivityGateway + from renku.core.interface.database_gateway import IDatabaseGateway + from renku.core.interface.dataset_gateway import IDatasetGateway + from renku.core.interface.plan_gateway import IPlanGateway + from renku.core.interface.project_gateway import IProjectGateway + from renku.infrastructure.gateway.activity_gateway import ActivityGateway + from renku.infrastructure.gateway.database_gateway import DatabaseGateway + from renku.infrastructure.gateway.dataset_gateway import DatasetGateway + from renku.infrastructure.gateway.plan_gateway import PlanGateway + from renku.infrastructure.gateway.project_gateway import ProjectGateway + + constructor_bindings = { + IPlanGateway: lambda: PlanGateway(), + IActivityGateway: lambda: ActivityGateway(), + IDatabaseGateway: lambda: DatabaseGateway(), + IDatasetGateway: lambda: DatasetGateway(), + IProjectGateway: lambda: ProjectGateway(), + } + + return {}, constructor_bindings + + +def get_dataset_with_injection(name: str) -> Optional["Dataset"]: + """Load dataset method with injection setup.""" + bindings, constructor_bindings = get_test_bindings() + + with replace_injection(bindings=bindings, constructor_bindings=constructor_bindings): + return load_dataset(name) + + +@contextmanager +def get_datasets_provenance_with_injection() -> Generator["DatasetsProvenance", None, None]: + """Yield an instance of DatasetsProvenance with injection setup.""" + from renku.core.dataset.datasets_provenance import DatasetsProvenance + + bindings, constructor_bindings = get_test_bindings() + + with replace_injection(bindings=bindings, constructor_bindings=constructor_bindings): + yield DatasetsProvenance() + + @contextmanager @inject.autoparams("dataset_gateway") def with_dataset( - client, *, name: str, dataset_gateway: IDatasetGateway, commit_database: bool = False, -) -> Iterator[Optional[Dataset]]: +) -> Iterator[Optional["Dataset"]]: """Yield an editable metadata object for a dataset.""" + from renku.core.dataset.datasets_provenance import DatasetsProvenance + dataset = DatasetsProvenance().get_by_name(name=name, strict=True, immutable=True) if not dataset: @@ -200,7 +244,7 @@ def wrapper(*args, **kwargs): return decorate() if fn else decorate -def write_and_commit_file(repository: Repository, path: Union[Path, str], content: str, commit: bool = True): +def write_and_commit_file(repository: "Repository", path: Union[Path, str], content: str, commit: bool = True): """Write content to a given file and make a commit.""" path = repository.path / path @@ -212,7 +256,7 @@ def write_and_commit_file(repository: Repository, path: Union[Path, str], conten repository.commit(f"Updated '{path.relative_to(repository.path)}'") -def delete_and_commit_file(repository: Repository, path: Union[Path, str]): +def delete_and_commit_file(repository: "Repository", path: Union[Path, str]): """Delete a file and make a commit.""" path = repository.path / path @@ -222,7 +266,7 @@ def delete_and_commit_file(repository: Repository, path: Union[Path, str]): repository.commit(f"Deleted '{path.relative_to(repository.path)}'") -def create_and_commit_files(repository: Repository, *path_and_content: Union[Path, str, Tuple[str, str]]): +def create_and_commit_files(repository: "Repository", *path_and_content: Union[Path, str, Tuple[str, str]]): """Write content to a given file and make a commit.""" for file in path_and_content: if isinstance(file, (Path, str)): @@ -236,16 +280,23 @@ def create_and_commit_files(repository: Repository, *path_and_content: Union[Pat def create_dummy_activity( - plan: Union[Plan, str], + plan: Union["Plan", str], *, ended_at_time=None, - generations: Iterable[Union[Path, str, Generation, Tuple[str, str]]] = (), + generations: Iterable[Union[Path, str, "Generation", Tuple[str, str]]] = (), id: Optional[str] = None, index: Optional[int] = None, parameters: Dict[str, Any] = None, - usages: Iterable[Union[Path, str, Usage, Tuple[str, str]]] = (), -) -> Activity: + usages: Iterable[Union[Path, str, "Usage", Tuple[str, str]]] = (), +) -> "Activity": """Create a dummy activity.""" + from renku.domain_model.entity import Entity + from renku.domain_model.provenance.activity import Activity, Association, Generation, Usage + from renku.domain_model.provenance.agent import Person, SoftwareAgent + from renku.domain_model.provenance.parameter import ParameterValue + from renku.domain_model.workflow.plan import Plan + from renku.infrastructure.repository import Repository + assert id is None or index is None, "Cannot set both 'id' and 'index'" if not isinstance(plan, Plan): @@ -319,10 +370,13 @@ def create_dummy_plan( inputs: Iterable[Union[str, Tuple[str, str]]] = (), keywords: List[str] = None, outputs: Iterable[Union[str, Tuple[str, str]]] = (), - parameters: Iterable[Tuple[str, Any, str]] = (), + parameters: Iterable[Tuple[str, Any, Optional[str]]] = (), success_codes: List[int] = None, -) -> Plan: +) -> "Plan": """Create a dummy plan.""" + from renku.domain_model.workflow.parameter import CommandInput, CommandOutput, CommandParameter, MappedIOStream + from renku.domain_model.workflow.plan import Plan + command = command or name id = Plan.generate_id(uuid=None if index is None else str(index)) @@ -385,10 +439,12 @@ def create_dummy_plan( return plan -def clone_compressed_repository(base_path, name) -> Repository: +def clone_compressed_repository(base_path, name) -> "Repository": """Decompress and clone a repository.""" import tarfile + from renku.infrastructure.repository import Repository + compressed_repo_path = Path(__file__).parent / "data" / f"{name}.tar.gz" working_dir = base_path / name @@ -408,4 +464,4 @@ def assert_rpc_response(response, with_key="result"): """Check rpc result in response.""" assert response and 200 == response.status_code - assert with_key in response.json.keys() + assert with_key in response.json.keys(), str(response.json)