Skip to content

Commit

Permalink
fix: Fixes sameAs property to follow schema.org spec (#944)
Browse files Browse the repository at this point in the history
* fix: Fixes sameAs property to follow schema.org spec

* Adds SHACL validation

* Adds explicit sameAs migration

Co-authored-by: Rok Roškar <rokroskar@gmail.com>
  • Loading branch information
Panaetius and rokroskar committed Jan 30, 2020
1 parent 1803e1e commit 291380e
Show file tree
Hide file tree
Showing 7 changed files with 110 additions and 12 deletions.
6 changes: 5 additions & 1 deletion renku/cli/log.py
Expand Up @@ -91,7 +91,11 @@


@click.command()
@click.option('--revision', default='HEAD')
@click.option(
'--revision',
default='HEAD',
help='The git revision to generate the log for, default: HEAD'
)
@click.option(
'--format',
type=click.Choice(FORMATS),
Expand Down
8 changes: 5 additions & 3 deletions renku/core/commands/dataset.py
Expand Up @@ -38,7 +38,8 @@
MigrationRequired, OperationError, ParameterError, UsageError
from renku.core.management.datasets import DATASET_METADATA_PATHS
from renku.core.management.git import COMMIT_DIFF_STRATEGY
from renku.core.models.datasets import Dataset, generate_default_short_name
from renku.core.models.datasets import Dataset, Url, \
generate_default_short_name
from renku.core.models.provenance.agents import Person
from renku.core.models.refs import LinkReference
from renku.core.models.tabulate import tabulate
Expand Down Expand Up @@ -239,8 +240,9 @@ def add_to_dataset(
with_metadata.files = dataset.files

if is_doi(with_metadata.identifier):
dataset.same_as = urllib.parse.urljoin(
'https://doi.org', with_metadata.identifier
dataset.same_as = Url(
url=urllib.parse.
urljoin('https://doi.org', with_metadata.identifier)
)

dataset.update_metadata(with_metadata)
Expand Down
28 changes: 27 additions & 1 deletion renku/core/models/datasets.py
Expand Up @@ -46,6 +46,30 @@
)


@jsonld.s(
type='schema:URL',
context={
'schema': 'http://schema.org/',
},
)
class Url:
"""Represents a schema URL reference."""

url = jsonld.ib(default=None, kw_only=True, context='schema:url')

_id = jsonld.ib(kw_only=True, context='@id')

@_id.default
def default_id(self):
"""Define default value for id field."""
if self.url:
parsed_result = urllib.parse.urlparse(self.url)
id_ = urllib.parse.ParseResult('', *parsed_result[1:]).geturl()
else:
id_ = str(uuid.uuid4())
return '_:URL@{0}'.format(id_)


def _convert_creators(value):
"""Convert creators."""
if isinstance(value, dict): # compatibility with previous versions
Expand Down Expand Up @@ -349,7 +373,9 @@ class Dataset(Entity, CreatorMixin):
kw_only=True
)

same_as = jsonld.ib(context='schema:sameAs', default=None, kw_only=True)
same_as = jsonld.ib(
context='schema:sameAs', default=None, kw_only=True, type=Url
)

short_name = jsonld.ib(
default=None, context='schema:alternateName', kw_only=True
Expand Down
8 changes: 6 additions & 2 deletions renku/core/models/migrations/__init__.py
Expand Up @@ -18,9 +18,13 @@
"""Renku JSON-LD migrations."""

from renku.core.models.migrations.dataset import migrate_absolute_paths, \
migrate_dataset_schema, migrate_doi_identifier
migrate_dataset_schema, migrate_doi_identifier, \
migrate_same_as_structure

JSONLD_MIGRATIONS = {
'dctypes:Dataset': [migrate_dataset_schema, migrate_absolute_paths],
'schema:Dataset': [migrate_absolute_paths, migrate_doi_identifier],
'schema:Dataset': [
migrate_absolute_paths, migrate_doi_identifier,
migrate_same_as_structure
],
}
37 changes: 35 additions & 2 deletions renku/core/models/migrations/dataset.py
Expand Up @@ -80,8 +80,41 @@ def migrate_doi_identifier(data):
if not is_uuid(identifier):
data['identifier'] = str(uuid.uuid4())
if is_doi(data.get('_id', '')):
data['same_as'] = data['_id']
data['same_as'] = {'@type': ['schema:URL'], 'url': data['_id']}
if data.get('@context'):
data['@context'].setdefault('same_as', 'schema:sameAs')
data['@context'].setdefault(
'same_as', {
'@id': 'schema:sameAs',
'@type': 'schema:URL',
'@context': {
'@version': '1.1',
'url': 'schema:url',
'schema': 'http://schema.org/'
}
}
)
data['_id'] = data['identifier']
return data


def migrate_same_as_structure(data):
"""Changes sameAs string to schema:URL object."""
same_as = data.get('same_as')

if same_as and isinstance(same_as, str):
data['same_as'] = {'@type': ['schema:URL'], 'url': same_as}

if data.get('@context'):
data['@context'].setdefault(
'same_as', {
'@id': 'schema:sameAs',
'@type': 'schema:URL',
'@context': {
'@version': '1.1',
'url': 'schema:url',
'schema': 'http://schema.org/'
}
}
)

return data
32 changes: 30 additions & 2 deletions renku/data/shacl_shape.json
Expand Up @@ -323,8 +323,8 @@
{
"nodeKind": "sh:Literal",
"path": "schema:sameAs",
"datatype": {
"@id": "xsd:string"
"sh:class": {
"@id": "schema:URL"
}
},
{
Expand Down Expand Up @@ -390,6 +390,34 @@
}
]
},
{
"@id": "_:URLShape",
"@type": "sh:NodeShape",
"ignoredProperties": [
{
"@id": "rdf:type"
}
],
"closed": true,
"targetClass": "schema:URL",
"property": [
{
"path": "schema:url",
"or": [
{
"nodeKind": "sh:Literal",
"datatype": {
"@id": "xsd:string"
}
},
{
"nodeKind": "sh:IRI"
}
],
"maxCount": 1
}
]
},
{
"@id": "_:inLanguageShape",
"@type": "sh:NodeShape",
Expand Down
3 changes: 2 additions & 1 deletion tests/core/commands/test_serialization.py
Expand Up @@ -98,7 +98,8 @@ def test_uuid_migration(dataset_metadata, client):
assert urljoin(
'https://localhost/datasets/', dataset.identifier
) == dataset._id
assert dataset.same_as.startswith('https://doi.org')

assert dataset.same_as.url.startswith('https://doi.org')


def test_dataset_creator_email(dataset_metadata):
Expand Down

0 comments on commit 291380e

Please sign in to comment.