From 246ffa4eea54006b5edf9648cdafa3507a30fa48 Mon Sep 17 00:00:00 2001 From: Mohammad Alisafaee Date: Thu, 24 Jun 2021 10:41:05 +0200 Subject: [PATCH] Address review comments --- renku/core/commands/dataset.py | 38 ++++++++++--------- renku/core/incubation/graph.py | 13 ++++--- .../management/command_builder/command.py | 7 +++- .../management/command_builder/database.py | 15 +++++++- renku/core/management/datasets.py | 32 ++++------------ renku/core/models/dataset.py | 10 ++--- tests/fixtures/graph.py | 6 +-- 7 files changed, 64 insertions(+), 57 deletions(-) diff --git a/renku/core/commands/dataset.py b/renku/core/commands/dataset.py index 67fd78e482..9a7d595305 100644 --- a/renku/core/commands/dataset.py +++ b/renku/core/commands/dataset.py @@ -21,6 +21,7 @@ import urllib from collections import OrderedDict from pathlib import Path +from typing import Optional import click import git @@ -37,7 +38,8 @@ from renku.core.management.command_builder import inject from renku.core.management.command_builder.command import Command from renku.core.management.datasets import DATASET_METADATA_PATHS -from renku.core.models.datasets import DatasetDetailsJson, Url, generate_default_name +from renku.core.models.dataset import DatasetsProvenance +from renku.core.models.datasets import DatasetDetailsJson, DatasetTag, Url, generate_default_name from renku.core.models.provenance.agents import Person from renku.core.models.refs import LinkReference from renku.core.models.tabulate import tabulate @@ -72,12 +74,13 @@ def list_datasets(): def create_dataset_helper( name, client: LocalClient, + datasets_provenance: DatasetsProvenance, title=None, description="", creators=None, keywords=None, images=None, - safe_image_paths=[], + safe_image_paths=None, ): """Create a dataset in the repository.""" if not creators: @@ -95,7 +98,7 @@ def create_dataset_helper( safe_image_paths=safe_image_paths, ) - client.update_datasets_provenance(dataset) + datasets_provenance.add_or_update(dataset) return dataset @@ -113,10 +116,11 @@ def _edit_dataset( description, creators, client: LocalClient, + datasets_provenance: DatasetsProvenance, keywords=None, - images=[], + images=None, skip_image_update=False, - safe_image_paths=[], + safe_image_paths=None, ): """Edit dataset metadata.""" creator_objs, no_email_warnings = _construct_creators(creators, ignore_email=True) @@ -149,8 +153,7 @@ def _edit_dataset( return [], no_email_warnings dataset.to_yaml() - - client.update_datasets_provenance(dataset) + datasets_provenance.add_or_update(dataset) return updated, no_email_warnings @@ -212,6 +215,7 @@ def _add_to_dataset( urls, name, client: LocalClient, + datasets_provenance: DatasetsProvenance, external=False, force=False, overwrite=False, @@ -270,7 +274,7 @@ def _add_to_dataset( dataset.update_metadata_from(with_metadata) - client.update_datasets_provenance(dataset) + datasets_provenance.add_or_update(dataset) return dataset except DatasetNotFound: raise DatasetNotFound( @@ -318,7 +322,7 @@ def list_files(): @inject.autoparams() -def _file_unlink(name, include, exclude, client: LocalClient, yes=False): +def _file_unlink(name, include, exclude, client: LocalClient, datasets_provenance: DatasetsProvenance, yes=False): """Remove matching files from a dataset.""" if not include and not exclude: raise ParameterError( @@ -351,7 +355,7 @@ def _file_unlink(name, include, exclude, client: LocalClient, yes=False): dataset.unlink_file(item.path) dataset.to_yaml() - client.update_datasets_provenance(dataset) + datasets_provenance.add_or_update(dataset) return records @@ -363,12 +367,12 @@ def file_unlink(): @inject.autoparams() -def _remove_dataset(name, client: LocalClient): +def _remove_dataset(name, client: LocalClient, datasets_provenance: DatasetsProvenance): """Delete a dataset.""" dataset = client.load_dataset(name=name, strict=True) dataset.mutate() dataset.to_yaml() - client.update_datasets_provenance(dataset, remove=True) + datasets_provenance.remove(dataset=dataset, client=client) client.repo.git.add(dataset.path) client.repo.index.commit("renku dataset rm: final mutation") @@ -801,7 +805,7 @@ def _filter(client: LocalClient, names=None, creators=None, include=None, exclud @inject.autoparams() -def _tag_dataset(name, tag, description, client: LocalClient, force=False): +def _tag_dataset(name, tag, description, client: LocalClient, datasets_provenance: DatasetsProvenance, force=False): """Creates a new tag for a dataset.""" dataset = client.load_dataset(name, strict=True) @@ -811,7 +815,7 @@ def _tag_dataset(name, tag, description, client: LocalClient, force=False): raise ParameterError(e) else: dataset.to_yaml() - client.update_datasets_provenance(dataset) + datasets_provenance.add_or_update(dataset) def tag_dataset(): @@ -821,7 +825,7 @@ def tag_dataset(): @inject.autoparams() -def _remove_dataset_tags(name, tags, client: LocalClient): +def _remove_dataset_tags(name, tags, client: LocalClient, datasets_provenance: DatasetsProvenance): """Removes tags from a dataset.""" dataset = client.load_dataset(name, strict=True) @@ -831,7 +835,7 @@ def _remove_dataset_tags(name, tags, client: LocalClient): raise ParameterError(e) else: dataset.to_yaml() - client.update_datasets_provenance(dataset) + datasets_provenance.add_or_update(dataset) def remove_dataset_tags(): @@ -867,7 +871,7 @@ def _prompt_access_token(exporter): return communication.prompt(text_prompt, type=str) -def _prompt_tag_selection(tags): +def _prompt_tag_selection(tags) -> Optional[DatasetTag]: """Prompt user to chose a tag or .""" # Prompt user to select a tag to export tags = sorted(tags, key=lambda t: t.created) diff --git a/renku/core/incubation/graph.py b/renku/core/incubation/graph.py index a7b9a0f0be..92a0b645d6 100644 --- a/renku/core/incubation/graph.py +++ b/renku/core/incubation/graph.py @@ -39,6 +39,7 @@ from renku.core.management.datasets import DATASET_METADATA_PATHS, DatasetsApiMixin from renku.core.management.migrate import migrate from renku.core.management.repository import RepositoryApiMixin +from renku.core.models.dataset import DatasetsProvenance from renku.core.models.entities import Entity from renku.core.models.jsonld import load_yaml from renku.core.models.provenance.activities import Activity @@ -61,8 +62,8 @@ def generate_graph(): """Return a command for generating the graph.""" - command = Command().command(_generate_graph).lock_project() - return command.require_migration().with_database(write=True).with_commit(commit_only=GRAPH_METADATA_PATHS) + command = Command().command(_generate_graph).lock_project().require_migration() + return command.with_database(write=True, create=True).with_commit(commit_only=GRAPH_METADATA_PATHS) @inject.autoparams() @@ -99,9 +100,9 @@ def process_datasets(commit): date = commit.authored_datetime for dataset in datasets: - client.update_datasets_provenance(dataset, revision=revision, date=date, commit_database=False) + datasets_provenance.add_or_update(dataset, revision=revision, date=date) for dataset in deleted_datasets: - client.update_datasets_provenance(dataset, revision=revision, date=date, commit_database=False, remove=True) + datasets_provenance.remove(dataset, revision=revision, date=date, client=client) commits = list(client.repo.iter_commits(paths=[f"{client.workflow_path}/*.yaml", ".renku/datasets/*/*.yml"])) n_commits = len(commits) @@ -113,6 +114,7 @@ def process_datasets(commit): raise errors.OperationError("Graph metadata exists. Use ``--force`` to regenerate it.") client.initialize_graph() + datasets_provenance = DatasetsProvenance.from_database(database) for n, commit in enumerate(commits, start=1): communication.echo(f"Processing commits {n}/{n_commits}", end="\r") @@ -415,6 +417,7 @@ def _add_to_dataset( client: LocalClient, urls, name, + datasets_provenance: DatasetsProvenance, external=False, force=False, overwrite=False, @@ -445,7 +448,7 @@ def _add_to_dataset( ref=ref, ) - client.update_datasets_provenance(dataset) + datasets_provenance.add_or_update(dataset) except errors.DatasetNotFound: raise errors.DatasetNotFound( message=f"Dataset `{name}` does not exist.\nUse `renku dataset create {name}` to create the dataset or " diff --git a/renku/core/management/command_builder/command.py b/renku/core/management/command_builder/command.py index d241506767..43a6fdff04 100644 --- a/renku/core/management/command_builder/command.py +++ b/renku/core/management/command_builder/command.py @@ -190,6 +190,7 @@ def _pre_hook(self, builder: "Command", context: dict, *args, **kwargs) -> None: stack = contextlib.ExitStack() context["bindings"] = {LocalClient: client, "LocalClient": client} + context["constructor_bindings"] = {} context["client"] = client context["stack"] = stack context["click_context"] = ctx @@ -225,6 +226,8 @@ def execute(self, *args, **kwargs) -> "CommandResult": def _bind(binder): for key, value in context["bindings"].items(): binder.bind(key, value) + for key, value in context["constructor_bindings"].items(): + binder.bind_to_constructor(key, value) return binder @@ -385,11 +388,11 @@ def with_communicator(self, communicator: CommunicationCallback) -> "Command": return Communicator(self, communicator) @check_finalized - def with_database(self, write: bool = False, path: str = None) -> "Command": + def with_database(self, write: bool = False, path: str = None, create: bool = False) -> "Command": """Provide an object database connection.""" from renku.core.management.command_builder.database import DatabaseCommand - return DatabaseCommand(self, write, path) + return DatabaseCommand(self, write, path, create) class CommandResult: diff --git a/renku/core/management/command_builder/database.py b/renku/core/management/command_builder/database.py index b5b4c7507c..059b2dac40 100644 --- a/renku/core/management/command_builder/database.py +++ b/renku/core/management/command_builder/database.py @@ -20,6 +20,7 @@ from renku.core.incubation.database import Database from renku.core.management.command_builder.command import Command, CommandResult, check_finalized +from renku.core.models.dataset import DatasetsProvenance class DatabaseCommand(Command): @@ -28,10 +29,11 @@ class DatabaseCommand(Command): PRE_ORDER = 4 POST_ORDER = 5 - def __init__(self, builder: Command, write: bool = False, path: str = None) -> None: + def __init__(self, builder: Command, write: bool = False, path: str = None, create: bool = False) -> None: self._builder = builder self._write = write self._path = path + self._create = create def _pre_hook(self, builder: Command, context: dict, *args, **kwargs) -> None: """Create a Database singleton.""" @@ -40,10 +42,21 @@ def _pre_hook(self, builder: Command, context: dict, *args, **kwargs) -> None: client = context["client"] + # TODO: Remove this block once we switched to use new graph + if not client.has_graph_files() and not self._create: + from unittest.mock import Mock + + self.database = Mock() + context["bindings"][Database] = self.database + context["bindings"][DatasetsProvenance] = Mock() + return + self.database = Database.from_path(path=self._path or client.database_path) context["bindings"][Database] = self.database + context["constructor_bindings"][DatasetsProvenance] = lambda: DatasetsProvenance.from_database(self.database) + def _post_hook(self, builder: Command, context: dict, result: CommandResult, *args, **kwargs) -> None: if self._write and not result.error: self.database.commit() diff --git a/renku/core/management/datasets.py b/renku/core/management/datasets.py index e25dc80892..341156a172 100644 --- a/renku/core/management/datasets.py +++ b/renku/core/management/datasets.py @@ -49,7 +49,7 @@ from renku.core.management.command_builder import inject from renku.core.management.command_builder.command import replace_injected_client from renku.core.management.config import RENKU_HOME -from renku.core.models.dataset import DatasetProvenance +from renku.core.models.dataset import DatasetsProvenance from renku.core.models.datasets import ( Dataset, DatasetFile, @@ -128,24 +128,6 @@ def renku_pointers_path(self): path.mkdir(exist_ok=True) return path - @inject.autoparams() - def update_datasets_provenance( - self, dataset, database: Database, *, remove=False, revision: str = None, date=None, commit_database=True - ): - """Update datasets provenance for a dataset.""" - if not self.has_graph_files(): - return - - datasets_provenance = DatasetProvenance.from_database(database) - - if remove: - datasets_provenance.remove_dataset(dataset=dataset, client=self, revision=revision, date=date) - else: - datasets_provenance.update_dataset(dataset=dataset, client=self, revision=revision, date=date) - - if commit_database: - database.commit() - def datasets_from_commit(self, commit=None): """Return datasets defined in a commit.""" commit = commit or self.repo.head.commit @@ -244,7 +226,7 @@ def with_dataset(self, name=None, create=False, immutable=False): @contextmanager def with_dataset_provenance(self, database: Database, *, name=None, create=False): """Yield a dataset's metadata from dataset provenance.""" - datasets_provenance = DatasetProvenance.from_database(database) + datasets_provenance = DatasetsProvenance.from_database(database) dataset = datasets_provenance.get_by_name(name=name) clean_up_required = False dataset_ref = None @@ -1059,7 +1041,8 @@ def update_dataset_local_files(self, records, delete=False): return updated_files, deleted_files - def _update_datasets_metadata(self, updated_files, deleted_files, delete): + @inject.autoparams() + def _update_datasets_metadata(self, updated_files, deleted_files, delete, datasets_provenance: DatasetsProvenance): modified_datasets = {} for file_ in updated_files: @@ -1076,7 +1059,7 @@ def _update_datasets_metadata(self, updated_files, deleted_files, delete): for dataset in modified_datasets.values(): dataset.to_yaml() - self.update_datasets_provenance(dataset) + datasets_provenance.add_or_update(dataset) def update_dataset_git_files(self, files, ref, delete=False): """Update files and dataset metadata according to their remotes. @@ -1201,7 +1184,8 @@ def _calculate_checksum(self, filepath): except GitCommandError: return None - def update_external_files(self, records): + @inject.autoparams() + def update_external_files(self, records, datasets_provenance: DatasetsProvenance): """Update files linked to external storage.""" updated_files_paths = [] updated_datasets = {} @@ -1232,7 +1216,7 @@ def update_external_files(self, records): file_.update_commit(commit) dataset.mutate() dataset.to_yaml() - self.update_datasets_provenance(dataset) + datasets_provenance.add_or_update(dataset) def _update_pointer_file(self, pointer_file_path): """Update a pointer file.""" diff --git a/renku/core/models/dataset.py b/renku/core/models/dataset.py index 51d28efe8b..62f361de0b 100644 --- a/renku/core/models/dataset.py +++ b/renku/core/models/dataset.py @@ -560,7 +560,7 @@ def _convert_to_dataset_files(self, client): return dataset_files -class DatasetProvenance: +class DatasetsProvenance: """A set of datasets.""" def __init__(self, datasets: Index, provenance: Index): @@ -570,12 +570,12 @@ def __init__(self, datasets: Index, provenance: Index): self._provenance: Index = provenance @classmethod - def from_database(cls, database: Database) -> "DatasetProvenance": + def from_database(cls, database: Database) -> "DatasetsProvenance": """Return an instance from a metadata database.""" datasets = database["datasets"] provenance = database["datasets-provenance"] - return DatasetProvenance(datasets=datasets, provenance=provenance) + return DatasetsProvenance(datasets=datasets, provenance=provenance) def get_by_id(self, id: str) -> Optional[Dataset]: """Return a dataset by its name.""" @@ -590,7 +590,7 @@ def get_provenance(self): return self._provenance.values() @inject.params(client="LocalClient") - def update_dataset( + def add_or_update( self, dataset: old_datasets.Dataset, client, @@ -617,7 +617,7 @@ def update_dataset( self._datasets.add(new_dataset) self._provenance.add(new_dataset) - def remove_dataset(self, dataset, client, revision=None, date=None): + def remove(self, dataset, client, revision=None, date=None): """Remove a dataset.""" new_dataset = Dataset.from_dataset(dataset, client, revision) current_dataset = self._datasets.pop(dataset.name, None) diff --git a/tests/fixtures/graph.py b/tests/fixtures/graph.py index 30d9a0fe84..8f4c47e9dd 100644 --- a/tests/fixtures/graph.py +++ b/tests/fixtures/graph.py @@ -31,15 +31,15 @@ def client_with_new_graph(client): @pytest.fixture def datasets_provenance(): - """A function to return DatasetProvenance for a client.""" + """A function to return DatasetsProvenance for a client.""" from renku.core.incubation.database import Database - from renku.core.models.dataset import DatasetProvenance + from renku.core.models.dataset import DatasetsProvenance def get_datasets_provenance(client): """Return dataset provenance if available.""" assert client.has_graph_files() database = Database.from_path(client.database_path) - return DatasetProvenance.from_database(database) + return DatasetsProvenance.from_database(database) return get_datasets_provenance