Skip to content

Commit

Permalink
Merge branch 'master' into 1283-show-lfs-files
Browse files Browse the repository at this point in the history
  • Loading branch information
Panaetius committed Oct 14, 2020
2 parents b4852cf + 197327a commit 604d4bc
Show file tree
Hide file tree
Showing 28 changed files with 405 additions and 228 deletions.
16 changes: 16 additions & 0 deletions conftest.py
Expand Up @@ -362,6 +362,22 @@ def request_callback(request):
yield rsps


@pytest.fixture
def missing_kg_project_responses():
"""KG project query responses for missing project."""
with responses.RequestsMock(assert_all_requests_are_fired=False) as rsps:

def request_callback(request):
return (404, {"Content-Type": "application/text"}, json.dumps({"message": "no project found"}))

rsps.add_callback(
responses.GET, re.compile("http(s)*://dev.renku.ch/knowledge-graph/projects/.*"), callback=request_callback
)
rsps.add_passthru(re.compile("http(s)*://dev.renku.ch/datasets/.*"))
rsps.add_passthru(re.compile("http(s)*://dev.renku.ch/knowledge-graph/datasets/.*"))
yield rsps


@pytest.fixture()
def directory_tree(tmp_path):
"""Create a test directory tree."""
Expand Down
2 changes: 1 addition & 1 deletion renku/cli/exception_handler.py
Expand Up @@ -109,7 +109,7 @@ def __init__(self, *args, **kwargs):
if HAS_SENTRY:
import sentry_sdk

sentry_sdk.init()
sentry_sdk.init(dsn=os.getenv("SENTRY_DSN"), environment=os.getenv("SENTRY_ENV"))

def main(self, *args, **kwargs):
"""Catch all exceptions."""
Expand Down
3 changes: 2 additions & 1 deletion renku/core/commands/graph.py
Expand Up @@ -31,6 +31,7 @@
from renku.core.models.provenance.activities import Activity, ProcessRun, Usage, WorkflowRun
from renku.core.models.provenance.qualified import Generation
from renku.core.models.workflow.run import Run
from renku.core.utils.scm import git_unicode_unescape


def _safe_path(filepath, can_be_cwl=False):
Expand Down Expand Up @@ -527,7 +528,7 @@ def build_graph(client, revision, no_output, paths):

commit = client.repo.rev_parse(stop)
paths = (
str(client.path / item.a_path)
str(client.path / git_unicode_unescape(item.a_path))
for item in commit.diff(commit.parents or NULL_TREE)
# if not item.deleted_file
)
Expand Down
25 changes: 20 additions & 5 deletions renku/core/commands/providers/renku.py
Expand Up @@ -55,9 +55,17 @@ def find_record(self, uri, client=None):
same_as, kg_urls = self._get_dataset_info(uri)
project_url = None
failed_urls = []
non_existing_projects = []

for kg_url in kg_urls:
kg_datasets_url, ssh_url, https_url = self._get_project_urls(kg_url)
try:
kg_datasets_url, ssh_url, https_url = self._get_project_urls(kg_url)
except errors.OperationError as e:
# NOTE: Project was likely deleted, but still referenced in the KG
if "project not found" not in str(e):
raise
non_existing_projects.append(kg_url)
continue

# Check if the project contains the dataset
if same_as is None: # Dataset is in the project
Expand Down Expand Up @@ -85,6 +93,12 @@ def find_record(self, uri, client=None):
if project_url is None:
if failed_urls:
message = "Cannot clone remote projects:\n\t" + "\n\t".join(failed_urls)
elif non_existing_projects:
raise errors.ProjectNotFound(
"Cannot find these projects in the knowledge graph:\n\t{}".format(
"\n\t".join(non_existing_projects)
)
)
else:
message = "Cannot find any project for the dataset."

Expand Down Expand Up @@ -178,11 +192,12 @@ def _query_knowledge_graph(url):
try:
response = requests.get(url)
except urllib.error.HTTPError as e:
raise errors.OperationError("Cannot access knowledge graph: {}".format(url)) from e
raise errors.OperationError(f"Cannot access knowledge graph: {url}") from e
if response.status_code != 200:
raise errors.OperationError(
"Cannot access knowledge graph: {}\nResponse code: {}".format(url, response.status_code)
)
if response.status_code == 404:
raise errors.OperationError(f"Cannot access knowledge graph: {url}, project not found")

raise errors.OperationError(f"Cannot access knowledge graph: {url}\nResponse code: {response.status_code}")

return response.json()

Expand Down
4 changes: 4 additions & 0 deletions renku/core/errors.py
Expand Up @@ -196,6 +196,10 @@ def __init__(self):
)


class ProjectNotFound(RenkuException):
"""Raise when one or more projects couldn't be found in the KG."""


class NothingToCommit(RenkuException):
"""Raise when there is nothing to commit."""

Expand Down
32 changes: 15 additions & 17 deletions renku/core/management/datasets.py
Expand Up @@ -20,6 +20,7 @@
import concurrent.futures
import os
import re
import shlex
import shutil
import tempfile
import time
Expand Down Expand Up @@ -677,23 +678,19 @@ def _get_src_and_dst(path, repo_path, sources, dst_root, used_sources):
def _fetch_lfs_files(repo_path, paths):
"""Fetch and checkout paths that are tracked by Git LFS."""
repo_path = str(repo_path)
try:
output = run(("git", "lfs", "ls-files", "--name-only"), stdout=PIPE, cwd=repo_path, universal_newlines=True)
except SubprocessError:
return

lfs_files = set(output.stdout.split("\n"))
files = lfs_files & paths
if not files:
return

try:
for path in files:
run(["git", "lfs", "pull", "--include", path], cwd=repo_path)
includes = ",".join(shlex.quote(p) for p in paths)
status = run(
["git", "lfs", "pull", "--include", includes], stderr=PIPE, cwd=repo_path, universal_newlines=True
)
if status.returncode != 0:
message = "\n\t".join(status.stderr.split("\n"))
raise errors.GitError(f"Cannot pull LFS objects from server: {message}")
except KeyboardInterrupt:
raise
except SubprocessError:
pass
except SubprocessError as e:
raise errors.GitError(f"Cannot pull LFS objects from server: {e}")

@staticmethod
def _fetch_files_metadata(client, paths):
Expand Down Expand Up @@ -955,7 +952,8 @@ def _update_pointer_file(self, pointer_file_path):
os.remove(pointer_file_path)
return self._create_pointer_file(target, checksum=checksum)

def remove_file(self, filepath):
@staticmethod
def remove_file(filepath):
"""Remove a file/symlink and its pointer file (for external files)."""
path = Path(filepath)
try:
Expand Down Expand Up @@ -1003,15 +1001,15 @@ def prepare_git_repo(self, url, ref=None):
if not url:
raise errors.GitError("Invalid URL.")

RENKU_BRANCH = "renku-default-branch"
renku_branch = "renku-default-branch"

def checkout(repo, ref):
try:
repo.git.checkout(ref)
except GitCommandError:
raise errors.ParameterError('Cannot find reference "{}" in Git repository: {}'.format(ref, url))

ref = ref or RENKU_BRANCH
ref = ref or renku_branch
u = GitURL.parse(url)
path = u.pathname
if u.hostname == "localhost":
Expand Down Expand Up @@ -1048,7 +1046,7 @@ def checkout(repo, ref):
# Because the name of the default branch is not always 'master', we
# create an alias of the default branch when cloning the repo. It
# is used to refer to the default branch later.
renku_ref = "refs/heads/" + RENKU_BRANCH
renku_ref = "refs/heads/" + renku_branch
try:
repo.git.execute(["git", "symbolic-ref", renku_ref, repo.head.reference.path])
checkout(repo, ref)
Expand Down
18 changes: 11 additions & 7 deletions renku/core/management/git.py
Expand Up @@ -33,6 +33,7 @@
import git

from renku.core import errors
from renku.core.utils.scm import git_unicode_unescape
from renku.core.utils.urls import remove_credentials

COMMIT_DIFF_STRATEGY = "DIFF"
Expand Down Expand Up @@ -213,8 +214,9 @@ def ensure_unstaged(self, path):
staged = self.repo.index.diff("HEAD")

for file_path in staged:
is_parent = str(file_path.a_path).startswith(path)
is_equal = path == file_path.a_path
unescaped_path = git_unicode_unescape(file_path.a_path)
is_parent = str(unescaped_path).startswith(path)
is_equal = path == unescaped_path

if is_parent or is_equal:
raise errors.DirtyRenkuDirectory(self.repo)
Expand All @@ -240,9 +242,9 @@ def commit(self, commit_only=None, commit_empty=True, raise_if_empty=False, comm
diff_before = set()

if commit_only == COMMIT_DIFF_STRATEGY:
staged = {item.a_path for item in self.repo.index.diff(None)}
staged = {git_unicode_unescape(item.a_path) for item in self.repo.index.diff(None)}

modified = {item.a_path for item in self.repo.index.diff("HEAD")}
modified = {git_unicode_unescape(item.a_path) for item in self.repo.index.diff("HEAD")}

if staged or modified:
self.repo.git.reset()
Expand All @@ -269,10 +271,12 @@ def commit(self, commit_only=None, commit_empty=True, raise_if_empty=False, comm

if commit_only == COMMIT_DIFF_STRATEGY:
# Get diff generated in command.
change_types = {item.a_path: item.change_type for item in self.repo.index.diff(None)}
change_types = {git_unicode_unescape(item.a_path): item.change_type for item in self.repo.index.diff(None)}
staged_after = set(change_types.keys())

modified_after_change_types = {item.a_path: item.change_type for item in self.repo.index.diff("HEAD")}
modified_after_change_types = {
git_unicode_unescape(item.a_path): item.change_type for item in self.repo.index.diff("HEAD")
}

modified_after = set(modified_after_change_types.keys())

Expand All @@ -294,7 +298,7 @@ def commit(self, commit_only=None, commit_empty=True, raise_if_empty=False, comm

diffs = []
try:
diffs = [d.a_path for d in self.repo.index.diff("HEAD")]
diffs = [git_unicode_unescape(d.a_path) for d in self.repo.index.diff("HEAD")]
if project_metadata_path in diffs:
diffs.remove(project_metadata_path)
except git.exc.BadName:
Expand Down
2 changes: 1 addition & 1 deletion renku/core/management/migrate.py
Expand Up @@ -37,7 +37,7 @@
from renku.core.errors import MigrationRequired, ProjectNotSupported
from renku.core.utils.migrate import read_project_version

SUPPORTED_PROJECT_VERSION = 7
SUPPORTED_PROJECT_VERSION = 8


def check_for_migration(client):
Expand Down
4 changes: 4 additions & 0 deletions renku/core/management/migrations/m_0003__1_jsonld.py
Expand Up @@ -79,6 +79,10 @@ def _apply_on_the_fly_jsonld_migrations(
):
data = read_yaml(path)

if not isinstance(data, dict) and not isinstance(data, list):
# NOTE: metadata file is probably not an actual renku file
return

if jsonld_translate:
# perform the translation
data = pyld.jsonld.expand(data)
Expand Down
3 changes: 2 additions & 1 deletion renku/core/management/migrations/m_0005__2_cwl.py
Expand Up @@ -36,6 +36,7 @@
from renku.core.models.provenance.agents import Person, SoftwareAgent
from renku.core.models.workflow.parameters import CommandArgument, CommandInput, CommandOutput, MappedIOStream
from renku.core.models.workflow.run import Run
from renku.core.utils.scm import git_unicode_unescape
from renku.version import __version__, version_url

default_missing_software_agent = SoftwareAgent(
Expand Down Expand Up @@ -365,7 +366,7 @@ def _invalidations_from_commit(client, commit):
# in this backwards diff
if file_.change_type != "A":
continue
path_ = Path(file_.a_path)
path_ = Path(git_unicode_unescape(file_.a_path))
entity = _get_activity_entity(client, commit, path_, collections, deleted=True)

results.append(entity)
Expand Down
30 changes: 30 additions & 0 deletions renku/core/management/migrations/m_0008__blank_node_id.py
@@ -0,0 +1,30 @@
# -*- coding: utf-8 -*-
#
# Copyright 2020 - Swiss Data Science Center (SDSC)
# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and
# Eidgenössische Technische Hochschule Zürich (ETHZ).
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Dataset metadata migrations."""

from renku.core.management.migrations.models.v8 import get_client_datasets


def migrate(client):
"""Migration function."""
_fix_dataset_metadata(client)


def _fix_dataset_metadata(client):
for dataset in get_client_datasets(client):
dataset.to_yaml()

0 comments on commit 604d4bc

Please sign in to comment.