Skip to content

Commit

Permalink
Address review comments
Browse files Browse the repository at this point in the history
  • Loading branch information
m-alisafaee committed Jun 24, 2021
1 parent 383dfdd commit 246ffa4
Show file tree
Hide file tree
Showing 7 changed files with 64 additions and 57 deletions.
38 changes: 21 additions & 17 deletions renku/core/commands/dataset.py
Expand Up @@ -21,6 +21,7 @@
import urllib
from collections import OrderedDict
from pathlib import Path
from typing import Optional

import click
import git
Expand All @@ -37,7 +38,8 @@
from renku.core.management.command_builder import inject
from renku.core.management.command_builder.command import Command
from renku.core.management.datasets import DATASET_METADATA_PATHS
from renku.core.models.datasets import DatasetDetailsJson, Url, generate_default_name
from renku.core.models.dataset import DatasetsProvenance
from renku.core.models.datasets import DatasetDetailsJson, DatasetTag, Url, generate_default_name
from renku.core.models.provenance.agents import Person
from renku.core.models.refs import LinkReference
from renku.core.models.tabulate import tabulate
Expand Down Expand Up @@ -72,12 +74,13 @@ def list_datasets():
def create_dataset_helper(
name,
client: LocalClient,
datasets_provenance: DatasetsProvenance,
title=None,
description="",
creators=None,
keywords=None,
images=None,
safe_image_paths=[],
safe_image_paths=None,
):
"""Create a dataset in the repository."""
if not creators:
Expand All @@ -95,7 +98,7 @@ def create_dataset_helper(
safe_image_paths=safe_image_paths,
)

client.update_datasets_provenance(dataset)
datasets_provenance.add_or_update(dataset)

return dataset

Expand All @@ -113,10 +116,11 @@ def _edit_dataset(
description,
creators,
client: LocalClient,
datasets_provenance: DatasetsProvenance,
keywords=None,
images=[],
images=None,
skip_image_update=False,
safe_image_paths=[],
safe_image_paths=None,
):
"""Edit dataset metadata."""
creator_objs, no_email_warnings = _construct_creators(creators, ignore_email=True)
Expand Down Expand Up @@ -149,8 +153,7 @@ def _edit_dataset(
return [], no_email_warnings

dataset.to_yaml()

client.update_datasets_provenance(dataset)
datasets_provenance.add_or_update(dataset)

return updated, no_email_warnings

Expand Down Expand Up @@ -212,6 +215,7 @@ def _add_to_dataset(
urls,
name,
client: LocalClient,
datasets_provenance: DatasetsProvenance,
external=False,
force=False,
overwrite=False,
Expand Down Expand Up @@ -270,7 +274,7 @@ def _add_to_dataset(

dataset.update_metadata_from(with_metadata)

client.update_datasets_provenance(dataset)
datasets_provenance.add_or_update(dataset)
return dataset
except DatasetNotFound:
raise DatasetNotFound(
Expand Down Expand Up @@ -318,7 +322,7 @@ def list_files():


@inject.autoparams()
def _file_unlink(name, include, exclude, client: LocalClient, yes=False):
def _file_unlink(name, include, exclude, client: LocalClient, datasets_provenance: DatasetsProvenance, yes=False):
"""Remove matching files from a dataset."""
if not include and not exclude:
raise ParameterError(
Expand Down Expand Up @@ -351,7 +355,7 @@ def _file_unlink(name, include, exclude, client: LocalClient, yes=False):
dataset.unlink_file(item.path)

dataset.to_yaml()
client.update_datasets_provenance(dataset)
datasets_provenance.add_or_update(dataset)

return records

Expand All @@ -363,12 +367,12 @@ def file_unlink():


@inject.autoparams()
def _remove_dataset(name, client: LocalClient):
def _remove_dataset(name, client: LocalClient, datasets_provenance: DatasetsProvenance):
"""Delete a dataset."""
dataset = client.load_dataset(name=name, strict=True)
dataset.mutate()
dataset.to_yaml()
client.update_datasets_provenance(dataset, remove=True)
datasets_provenance.remove(dataset=dataset, client=client)

client.repo.git.add(dataset.path)
client.repo.index.commit("renku dataset rm: final mutation")
Expand Down Expand Up @@ -801,7 +805,7 @@ def _filter(client: LocalClient, names=None, creators=None, include=None, exclud


@inject.autoparams()
def _tag_dataset(name, tag, description, client: LocalClient, force=False):
def _tag_dataset(name, tag, description, client: LocalClient, datasets_provenance: DatasetsProvenance, force=False):
"""Creates a new tag for a dataset."""
dataset = client.load_dataset(name, strict=True)

Expand All @@ -811,7 +815,7 @@ def _tag_dataset(name, tag, description, client: LocalClient, force=False):
raise ParameterError(e)
else:
dataset.to_yaml()
client.update_datasets_provenance(dataset)
datasets_provenance.add_or_update(dataset)


def tag_dataset():
Expand All @@ -821,7 +825,7 @@ def tag_dataset():


@inject.autoparams()
def _remove_dataset_tags(name, tags, client: LocalClient):
def _remove_dataset_tags(name, tags, client: LocalClient, datasets_provenance: DatasetsProvenance):
"""Removes tags from a dataset."""
dataset = client.load_dataset(name, strict=True)

Expand All @@ -831,7 +835,7 @@ def _remove_dataset_tags(name, tags, client: LocalClient):
raise ParameterError(e)
else:
dataset.to_yaml()
client.update_datasets_provenance(dataset)
datasets_provenance.add_or_update(dataset)


def remove_dataset_tags():
Expand Down Expand Up @@ -867,7 +871,7 @@ def _prompt_access_token(exporter):
return communication.prompt(text_prompt, type=str)


def _prompt_tag_selection(tags):
def _prompt_tag_selection(tags) -> Optional[DatasetTag]:
"""Prompt user to chose a tag or <HEAD>."""
# Prompt user to select a tag to export
tags = sorted(tags, key=lambda t: t.created)
Expand Down
13 changes: 8 additions & 5 deletions renku/core/incubation/graph.py
Expand Up @@ -39,6 +39,7 @@
from renku.core.management.datasets import DATASET_METADATA_PATHS, DatasetsApiMixin
from renku.core.management.migrate import migrate
from renku.core.management.repository import RepositoryApiMixin
from renku.core.models.dataset import DatasetsProvenance
from renku.core.models.entities import Entity
from renku.core.models.jsonld import load_yaml
from renku.core.models.provenance.activities import Activity
Expand All @@ -61,8 +62,8 @@

def generate_graph():
"""Return a command for generating the graph."""
command = Command().command(_generate_graph).lock_project()
return command.require_migration().with_database(write=True).with_commit(commit_only=GRAPH_METADATA_PATHS)
command = Command().command(_generate_graph).lock_project().require_migration()
return command.with_database(write=True, create=True).with_commit(commit_only=GRAPH_METADATA_PATHS)


@inject.autoparams()
Expand Down Expand Up @@ -99,9 +100,9 @@ def process_datasets(commit):
date = commit.authored_datetime

for dataset in datasets:
client.update_datasets_provenance(dataset, revision=revision, date=date, commit_database=False)
datasets_provenance.add_or_update(dataset, revision=revision, date=date)
for dataset in deleted_datasets:
client.update_datasets_provenance(dataset, revision=revision, date=date, commit_database=False, remove=True)
datasets_provenance.remove(dataset, revision=revision, date=date, client=client)

commits = list(client.repo.iter_commits(paths=[f"{client.workflow_path}/*.yaml", ".renku/datasets/*/*.yml"]))
n_commits = len(commits)
Expand All @@ -113,6 +114,7 @@ def process_datasets(commit):
raise errors.OperationError("Graph metadata exists. Use ``--force`` to regenerate it.")

client.initialize_graph()
datasets_provenance = DatasetsProvenance.from_database(database)

for n, commit in enumerate(commits, start=1):
communication.echo(f"Processing commits {n}/{n_commits}", end="\r")
Expand Down Expand Up @@ -415,6 +417,7 @@ def _add_to_dataset(
client: LocalClient,
urls,
name,
datasets_provenance: DatasetsProvenance,
external=False,
force=False,
overwrite=False,
Expand Down Expand Up @@ -445,7 +448,7 @@ def _add_to_dataset(
ref=ref,
)

client.update_datasets_provenance(dataset)
datasets_provenance.add_or_update(dataset)
except errors.DatasetNotFound:
raise errors.DatasetNotFound(
message=f"Dataset `{name}` does not exist.\nUse `renku dataset create {name}` to create the dataset or "
Expand Down
7 changes: 5 additions & 2 deletions renku/core/management/command_builder/command.py
Expand Up @@ -190,6 +190,7 @@ def _pre_hook(self, builder: "Command", context: dict, *args, **kwargs) -> None:
stack = contextlib.ExitStack()

context["bindings"] = {LocalClient: client, "LocalClient": client}
context["constructor_bindings"] = {}
context["client"] = client
context["stack"] = stack
context["click_context"] = ctx
Expand Down Expand Up @@ -225,6 +226,8 @@ def execute(self, *args, **kwargs) -> "CommandResult":
def _bind(binder):
for key, value in context["bindings"].items():
binder.bind(key, value)
for key, value in context["constructor_bindings"].items():
binder.bind_to_constructor(key, value)

return binder

Expand Down Expand Up @@ -385,11 +388,11 @@ def with_communicator(self, communicator: CommunicationCallback) -> "Command":
return Communicator(self, communicator)

@check_finalized
def with_database(self, write: bool = False, path: str = None) -> "Command":
def with_database(self, write: bool = False, path: str = None, create: bool = False) -> "Command":
"""Provide an object database connection."""
from renku.core.management.command_builder.database import DatabaseCommand

return DatabaseCommand(self, write, path)
return DatabaseCommand(self, write, path, create)


class CommandResult:
Expand Down
15 changes: 14 additions & 1 deletion renku/core/management/command_builder/database.py
Expand Up @@ -20,6 +20,7 @@

from renku.core.incubation.database import Database
from renku.core.management.command_builder.command import Command, CommandResult, check_finalized
from renku.core.models.dataset import DatasetsProvenance


class DatabaseCommand(Command):
Expand All @@ -28,10 +29,11 @@ class DatabaseCommand(Command):
PRE_ORDER = 4
POST_ORDER = 5

def __init__(self, builder: Command, write: bool = False, path: str = None) -> None:
def __init__(self, builder: Command, write: bool = False, path: str = None, create: bool = False) -> None:
self._builder = builder
self._write = write
self._path = path
self._create = create

def _pre_hook(self, builder: Command, context: dict, *args, **kwargs) -> None:
"""Create a Database singleton."""
Expand All @@ -40,10 +42,21 @@ def _pre_hook(self, builder: Command, context: dict, *args, **kwargs) -> None:

client = context["client"]

# TODO: Remove this block once we switched to use new graph
if not client.has_graph_files() and not self._create:
from unittest.mock import Mock

self.database = Mock()
context["bindings"][Database] = self.database
context["bindings"][DatasetsProvenance] = Mock()
return

self.database = Database.from_path(path=self._path or client.database_path)

context["bindings"][Database] = self.database

context["constructor_bindings"][DatasetsProvenance] = lambda: DatasetsProvenance.from_database(self.database)

def _post_hook(self, builder: Command, context: dict, result: CommandResult, *args, **kwargs) -> None:
if self._write and not result.error:
self.database.commit()
Expand Down
32 changes: 8 additions & 24 deletions renku/core/management/datasets.py
Expand Up @@ -49,7 +49,7 @@
from renku.core.management.command_builder import inject
from renku.core.management.command_builder.command import replace_injected_client
from renku.core.management.config import RENKU_HOME
from renku.core.models.dataset import DatasetProvenance
from renku.core.models.dataset import DatasetsProvenance
from renku.core.models.datasets import (
Dataset,
DatasetFile,
Expand Down Expand Up @@ -128,24 +128,6 @@ def renku_pointers_path(self):
path.mkdir(exist_ok=True)
return path

@inject.autoparams()
def update_datasets_provenance(
self, dataset, database: Database, *, remove=False, revision: str = None, date=None, commit_database=True
):
"""Update datasets provenance for a dataset."""
if not self.has_graph_files():
return

datasets_provenance = DatasetProvenance.from_database(database)

if remove:
datasets_provenance.remove_dataset(dataset=dataset, client=self, revision=revision, date=date)
else:
datasets_provenance.update_dataset(dataset=dataset, client=self, revision=revision, date=date)

if commit_database:
database.commit()

def datasets_from_commit(self, commit=None):
"""Return datasets defined in a commit."""
commit = commit or self.repo.head.commit
Expand Down Expand Up @@ -244,7 +226,7 @@ def with_dataset(self, name=None, create=False, immutable=False):
@contextmanager
def with_dataset_provenance(self, database: Database, *, name=None, create=False):
"""Yield a dataset's metadata from dataset provenance."""
datasets_provenance = DatasetProvenance.from_database(database)
datasets_provenance = DatasetsProvenance.from_database(database)
dataset = datasets_provenance.get_by_name(name=name)
clean_up_required = False
dataset_ref = None
Expand Down Expand Up @@ -1059,7 +1041,8 @@ def update_dataset_local_files(self, records, delete=False):

return updated_files, deleted_files

def _update_datasets_metadata(self, updated_files, deleted_files, delete):
@inject.autoparams()
def _update_datasets_metadata(self, updated_files, deleted_files, delete, datasets_provenance: DatasetsProvenance):
modified_datasets = {}

for file_ in updated_files:
Expand All @@ -1076,7 +1059,7 @@ def _update_datasets_metadata(self, updated_files, deleted_files, delete):

for dataset in modified_datasets.values():
dataset.to_yaml()
self.update_datasets_provenance(dataset)
datasets_provenance.add_or_update(dataset)

def update_dataset_git_files(self, files, ref, delete=False):
"""Update files and dataset metadata according to their remotes.
Expand Down Expand Up @@ -1201,7 +1184,8 @@ def _calculate_checksum(self, filepath):
except GitCommandError:
return None

def update_external_files(self, records):
@inject.autoparams()
def update_external_files(self, records, datasets_provenance: DatasetsProvenance):
"""Update files linked to external storage."""
updated_files_paths = []
updated_datasets = {}
Expand Down Expand Up @@ -1232,7 +1216,7 @@ def update_external_files(self, records):
file_.update_commit(commit)
dataset.mutate()
dataset.to_yaml()
self.update_datasets_provenance(dataset)
datasets_provenance.add_or_update(dataset)

def _update_pointer_file(self, pointer_file_path):
"""Update a pointer file."""
Expand Down

0 comments on commit 246ffa4

Please sign in to comment.