Skip to content

Commit

Permalink
feat(api): project status
Browse files Browse the repository at this point in the history
  • Loading branch information
m-alisafaee committed May 10, 2022
1 parent 396651a commit 6b17133
Show file tree
Hide file tree
Showing 11 changed files with 512 additions and 343 deletions.
69 changes: 2 additions & 67 deletions renku/command/status.py
Expand Up @@ -17,75 +17,10 @@
# limitations under the License.
"""Renku ``status`` command."""

from collections import defaultdict
from pathlib import Path
from typing import Dict, Set

from renku.command.command_builder import inject
from renku.command.command_builder.command import Command
from renku.core.interface.client_dispatcher import IClientDispatcher
from renku.core.util.os import get_relative_path_to_cwd, get_relative_paths
from renku.core.workflow.activity import (
get_all_modified_and_deleted_activities_and_entities,
get_downstream_generating_activities,
is_activity_valid,
)
from renku.core.workflow.run import get_status


def get_status_command():
"""Show a status of the repository."""
return Command().command(_get_status).require_migration().with_database(write=False)


@inject.autoparams()
def _get_status(ignore_deleted: bool, client_dispatcher: IClientDispatcher, paths=None):
def mark_generations_as_stale(activity):
for generation in activity.generations:
generation_path = get_relative_path_to_cwd(client.path / generation.entity.path)
stale_outputs[generation_path].add(usage_path)

client = client_dispatcher.current_client

ignore_deleted = ignore_deleted or client.get_value("renku", "update_ignore_delete")

modified, deleted = get_all_modified_and_deleted_activities_and_entities(client.repository)

modified = {(a, e) for a, e in modified if is_activity_valid(a)}
deleted = {(a, e) for a, e in deleted if is_activity_valid(a)}

if not modified and not deleted:
return None, None, None, None

paths = paths or []
paths = get_relative_paths(base=client.path, paths=[Path.cwd() / p for p in paths])

modified_inputs: Set[str] = set()
stale_outputs: Dict[str, Set[str]] = defaultdict(set)
stale_activities: Dict[str, Set[str]] = defaultdict(set)

for start_activity, entity in modified:
usage_path = get_relative_path_to_cwd(client.path / entity.path)

# NOTE: Add all downstream activities if the modified entity is in paths; otherwise, add only activities that
# chain-generate at least one of the paths
generation_paths = [] if not paths or entity.path in paths else paths

activities = get_downstream_generating_activities(
starting_activities={start_activity},
paths=generation_paths,
ignore_deleted=ignore_deleted,
client_path=client.path,
)
if activities:
modified_inputs.add(usage_path)

for activity in activities:
if len(activity.generations) == 0:
stale_activities[activity.id].add(usage_path)
else:
mark_generations_as_stale(activity)

deleted_paths = {e.path for _, e in deleted}
deleted_paths = {get_relative_path_to_cwd(client.path / d) for d in deleted_paths if not paths or d in paths}

return stale_outputs, stale_activities, modified_inputs, deleted_paths
return Command().command(get_status).require_migration().with_database(write=False)
69 changes: 0 additions & 69 deletions renku/core/compat.py

This file was deleted.

2 changes: 1 addition & 1 deletion renku/core/management/repository.py
Expand Up @@ -21,6 +21,7 @@
import shutil
from contextlib import contextmanager
from fnmatch import fnmatch
from pathlib import Path
from typing import Any, Optional
from uuid import uuid4

Expand All @@ -29,7 +30,6 @@

from renku.command.command_builder import inject
from renku.core import errors
from renku.core.compat import Path
from renku.core.constant import RENKU_HOME
from renku.core.interface.database_gateway import IDatabaseGateway
from renku.core.interface.project_gateway import IProjectGateway
Expand Down
14 changes: 14 additions & 0 deletions renku/core/management/storage.py
Expand Up @@ -44,6 +44,20 @@
from .repository import RepositoryApiMixin # type: ignore


class RenkuGitWildMatchPattern(pathspec.patterns.GitWildMatchPattern):
"""Custom GitWildMatchPattern matcher."""

__slots__ = ("pattern",)

def __init__(self, pattern, include=None):
"""Initialize RenkuRegexPattern."""
super().__init__(pattern, include)
self.pattern = pattern


pathspec.util.register_pattern("renku_gitwildmatch", RenkuGitWildMatchPattern)


def check_external_storage_wrapper(fn):
"""Check availability of external storage on methods that need it.
Expand Down
5 changes: 3 additions & 2 deletions renku/core/workflow/activity.py
Expand Up @@ -15,7 +15,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Renku workflow commands."""
"""Activity management."""

import itertools
from collections import defaultdict
Expand Down Expand Up @@ -374,7 +374,8 @@ def get_modified_activities(
for usage in activity.usages:
entity = usage.entity
current_checksum = hashes.get(entity.path, None)
if current_checksum is None:
usage_path = repository.path / usage.entity.path
if current_checksum is None or not usage_path.exists():
deleted.add((activity, entity))
elif current_checksum != entity.checksum:
modified.add((activity, entity))
Expand Down
101 changes: 101 additions & 0 deletions renku/core/workflow/run.py
@@ -0,0 +1,101 @@
# -*- coding: utf-8 -*-
#
# Copyright 2018-2022- Swiss Data Science Center (SDSC)
# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and
# Eidgenössische Technische Hochschule Zürich (ETHZ).
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Running workflows logic."""

from collections import defaultdict
from pathlib import Path
from typing import Dict, List, Optional, Set, Tuple, Union

from renku.command.command_builder import inject
from renku.core.interface.client_dispatcher import IClientDispatcher
from renku.core.util.os import get_relative_path_to_cwd, get_relative_paths
from renku.core.workflow.activity import (
get_all_modified_and_deleted_activities_and_entities,
get_downstream_generating_activities,
is_activity_valid,
)


@inject.autoparams("client_dispatcher")
def get_status(
client_dispatcher: IClientDispatcher, paths: Optional[List[Union[Path, str]]] = None, ignore_deleted: bool = False
) -> Tuple[Dict[str, Set[str]], Dict[str, Set[str]], Set[str], Set[str]]:
"""Return status of a project.
Args:
client_dispatcher(IClientDispatcher): Injected client dispatcher.
paths(Optional[List[Union[Path, str]]]): Limit the status to this list of paths (Default value = None).
ignore_deleted(bool): Whether to ignore deleted generations.
Returns:
Tuple[Dict[str, Set[str]], Dict[str, Set[str]], Set[str], Set[str]]: A quadruple containing a mapping of stale
outputs to modified inputs, a mapping of stale activities that have no generation to modified inputs, a set
of modified inputs, and a set of deleted inputs.
"""

def mark_generations_as_stale(activity):
for generation in activity.generations:
generation_path = get_relative_path_to_cwd(client.path / generation.entity.path)
stale_outputs[generation_path].add(usage_path)

client = client_dispatcher.current_client

ignore_deleted = ignore_deleted or client.get_value("renku", "update_ignore_delete")

modified, deleted = get_all_modified_and_deleted_activities_and_entities(client.repository)

modified = {(a, e) for a, e in modified if is_activity_valid(a)}
deleted = {(a, e) for a, e in deleted if is_activity_valid(a)}

if not modified and not deleted:
return {}, {}, set(), set()

paths = paths or []
paths = get_relative_paths(base=client.path, paths=[Path.cwd() / p for p in paths]) # type: ignore

modified_inputs: Set[str] = set()
stale_outputs: Dict[str, Set[str]] = defaultdict(set)
stale_activities: Dict[str, Set[str]] = defaultdict(set)

for start_activity, entity in modified:
usage_path = get_relative_path_to_cwd(client.path / entity.path)

# NOTE: Add all downstream activities if the modified entity is in paths; otherwise, add only activities that
# chain-generate at least one of the paths
generation_paths = [] if not paths or entity.path in paths else paths

activities = get_downstream_generating_activities(
starting_activities={start_activity},
paths=generation_paths,
ignore_deleted=ignore_deleted,
client_path=client.path,
)
if activities:
modified_inputs.add(usage_path)

for activity in activities:
if len(activity.generations) == 0:
stale_activities[activity.id].add(usage_path)
else:
mark_generations_as_stale(activity)

deleted_paths = {e.path for _, e in deleted}
deleted_paths = {get_relative_path_to_cwd(client.path / d) for d in deleted_paths if not paths or d in paths}

return stale_outputs, stale_activities, modified_inputs, deleted_paths
34 changes: 32 additions & 2 deletions renku/ui/api/models/project.py
Expand Up @@ -22,7 +22,7 @@
entities.
Normally, you do not need to create an instance of Project class directly
unless you want to have access to Project metadata (e.g. path). To separate
unless you want to have access to Project metadata (e.g. path) or get its status. To separate
parts of your script that uses Renku entities, you can create a Project context
manager and interact with Renku inside it:
Expand All @@ -31,13 +31,25 @@
from renku.ui.api import Project, Input
with Project():
input_1 = Input("data_1")
input_1 = Input("input_1", "path_1")
You can use Project's ``status`` method to get info about outdated outputs and
activities, and modified or deleted inputs:
.. code-block:: python
from renku.ui.api import Project
outdated_generations, outdated_activities, modified_inputs, deleted_inputs = Project().status()
"""
from functools import wraps
from pathlib import Path
from typing import Dict, List, Optional, Set, Tuple, Union

from werkzeug.local import LocalStack

from renku.command.status import get_status_command
from renku.core import errors


Expand Down Expand Up @@ -69,6 +81,24 @@ def path(self):
"""Absolute path to project's root directory."""
return self._client.path.resolve()

def status(
self, paths: Optional[List[Union[Path, str]]] = None, ignore_deleted: bool = False
) -> Tuple[Dict[str, Set[str]], Dict[str, Set[str]], Set[str], Set[str]]:
"""Return status of a project.
Args:
paths(Optional[List[Union[Path, str]]]): Limit the status to this list of paths (Default value = None).
ignore_deleted(bool): Whether to ignore deleted generations.
Returns:
Tuple[Dict[str, Set[str]], Dict[str, Set[str]], Set[str], Set[str]]: A quadruple containing a mapping of
stale outputs to modified inputs, a mapping of stale activities that have no generation to modified
inputs, a set of modified inputs, and a set of deleted inputs.
"""
result = get_status_command().build().execute(paths=paths, ignore_deleted=ignore_deleted)
return result.output


def ensure_project_context(fn):
"""Check existence of a project context.
Expand Down

0 comments on commit 6b17133

Please sign in to comment.