Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 19 additions & 10 deletions conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,11 @@
import tempfile
import time
import urllib
from pathlib import Path

import pytest
import responses
import yaml
from click.testing import CliRunner


Expand Down Expand Up @@ -452,17 +454,24 @@ def renku_cli(*args):
return renku_cli


@pytest.fixture
def doi_dataset():
"""Return a yaml of dataset using DOI for its id."""
from pathlib import Path
dataset_path = Path(
__file__
).parent / 'tests' / 'fixtures' / 'doi-dataset.yml'
with open(dataset_path.as_posix()) as f:
dataset_yaml = f.read()
@pytest.fixture(
params=[{
'path':
Path(__file__).parent / 'tests' / 'fixtures' / 'doi-dataset.yml',
}, {
'path':
Path(__file__).parent / 'tests' / 'fixtures' /
'broken-dataset-v0.5.2.yml',
}]
)
def dataset_metadata(request):
"""Return dataset metadata fixture."""
from renku.core.models.jsonld import NoDatesSafeLoader

file_path = request.param['path']

return dataset_yaml
data = yaml.load(file_path.read_text(), Loader=NoDatesSafeLoader)
yield data


@pytest.fixture()
Expand Down
11 changes: 11 additions & 0 deletions renku/core/commands/checks/migration.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@

from renku.core.models.datasets import Dataset
from renku.core.models.refs import LinkReference
from renku.core.utils.urls import url_to_string

from ..echo import WARNING

Expand Down Expand Up @@ -235,9 +236,19 @@ def fix_uncommitted_labels(client):
dataset.to_yaml()


def fix_dataset_files_urls(client):
"""Ensure dataset files have correct url format."""
for dataset in client.datasets.values():
for file_ in dataset.files:
file_.url = url_to_string(file_.url)

dataset.to_yaml()


STRUCTURE_MIGRATIONS = [
ensure_clean_lock,
migrate_datasets_pre_v0_3,
migrate_broken_dataset_paths,
fix_uncommitted_labels,
fix_dataset_files_urls,
]
8 changes: 7 additions & 1 deletion renku/core/models/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,12 @@ def _convert_dataset_files_creators(value):
return [Creator.from_jsonld(c) for c in coll]


def convert_filename_path(p):
"""Return name of the file."""
if p:
return Path(p).name


Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you please explain how this fix works? If filename is None then no DatasetFile object will be created!?

Copy link
Contributor Author

@jsam jsam Oct 31, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it should be created, but filename field will remain None within the instantiated object.

The problem of why raise was happening is because this Path(None) results in TypeError. Now why that is None is probably during import phase something went wrong or we didn't properly validate.

@jsonld.s(
type='schema:DigitalDocument',
slots=True,
Expand All @@ -153,7 +159,7 @@ class DatasetFile(Entity, CreatorsMixin):

dataset = jsonld.ib(context='schema:isPartOf', default=None, kw_only=True)

filename = attr.ib(kw_only=True, converter=lambda x: Path(x).name)
filename = attr.ib(kw_only=True, converter=convert_filename_path)

name = jsonld.ib(context='schema:name', kw_only=True, default=None)

Expand Down
41 changes: 41 additions & 0 deletions renku/core/utils/urls.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# -*- coding: utf-8 -*-
#
# Copyright 2019 - Swiss Data Science Center (SDSC)
# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and
# Eidgenössische Technische Hochschule Zürich (ETHZ).
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Helpers utils for handling URLs."""

from urllib.parse import ParseResult


def url_to_string(url):
"""Convert url from ``list`` or ``ParseResult`` to string."""
if isinstance(url, list):
return ParseResult(
scheme=url[0],
netloc=url[1],
path=url[2],
params=None,
query=None,
fragment=None,
).geturl()

if isinstance(url, ParseResult):
return url.geturl()

if isinstance(url, str):
return url

raise ValueError('url value not recognized')
14 changes: 14 additions & 0 deletions tests/cli/test_migrate.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
from renku import LocalClient
from renku.cli import cli
from renku.core.management.config import RENKU_HOME
from renku.core.models.datasets import Dataset
from renku.core.utils.urls import url_to_string


@pytest.mark.migration
Expand Down Expand Up @@ -161,3 +163,15 @@ def test_migrations_run_once(isolated_runner, old_project):

result = isolated_runner.invoke(cli, ['migrate', 'datasets'])
assert 1 == result.exit_code


@pytest.mark.migration
def test_migration_broken_urls(dataset_metadata):
"""Check that migration of broken dataset file URLs is string."""
dataset = Dataset.from_jsonld(
dataset_metadata,
client=LocalClient('.'),
)

for file_ in dataset.files:
assert isinstance(url_to_string(file_.url), str)
40 changes: 28 additions & 12 deletions tests/core/commands/test_serialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,13 @@
"""Serialization tests for renku models."""

import datetime
from urllib.parse import quote, urljoin

import yaml

from renku.core.management.client import LocalClient
from renku.core.models.datasets import Dataset


def test_dataset_serialization(client, dataset, data_file):
"""Test Dataset serialization."""
Expand Down Expand Up @@ -71,20 +75,32 @@ def test_dataset_deserialization(client, dataset):
assert type(creator.get(attribute)) is type_


def test_doi_migration(doi_dataset):
"""Test migration of id with doi."""
import yaml
from urllib.parse import quote, urljoin
def test_dataset_doi_metadata(dataset_metadata):
"""Check dataset metadata for correct DOI."""
from renku.core.utils.doi import is_doi
from renku.core.models.datasets import Dataset
from renku.core.models.jsonld import NoDatesSafeLoader
dataset = Dataset.from_jsonld(
dataset_metadata,
client=LocalClient('.'),
)

if is_doi(dataset.identifier):
assert urljoin(
'https://doi.org', dataset.identifier
) == dataset.same_as

assert dataset._id.endswith(
'datasets/{}'.format(quote(dataset.identifier, safe=''))
)


def test_dataset_files_empty_metadata(dataset_metadata):
"""Check parsing metadata of dataset files with empty filename."""
dataset = Dataset.from_jsonld(
yaml.load(doi_dataset, Loader=NoDatesSafeLoader)
dataset_metadata,
client=LocalClient('.'),
)

assert is_doi(dataset.identifier)
assert urljoin(
'https://localhost', 'datasets/' + quote(dataset.identifier, safe='')
) == dataset._id
assert dataset.same_as == urljoin('https://doi.org', dataset.identifier)
files = [file.filename for file in dataset.files if not file.filename]

if files:
assert None in files
Loading