Skip to content

Commit 44183e6

Browse files
rokroskarjsam
authored andcommitted
fix: user-related metadata (#655)
* fix: explicitly add nested metadata fields * fix: set the blank node correctly * chore(tests): add basic dataset serialization tests * fix: omit "mailto" from email
1 parent 4d4d7d2 commit 44183e6

File tree

5 files changed

+88
-32
lines changed

5 files changed

+88
-32
lines changed

conftest.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -182,10 +182,12 @@ def client(project):
182182
def dataset(client):
183183
"""Create a dataset."""
184184
with client.with_dataset(name='dataset') as dataset:
185-
dataset.author = {
186-
'name': 'me',
185+
dataset.creator = [{
186+
'affiliation': 'xxx',
187187
'email': 'me@example.com',
188-
}
188+
'_id': 'me_id',
189+
'name': 'me',
190+
}]
189191
return dataset
190192

191193

renku/models/datasets.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -128,13 +128,13 @@ def default_id(self):
128128
"""Set the default id."""
129129
if self.email:
130130
return 'mailto:{email}'.format(email=self.email)
131-
return '_' + str(uuid.uuid4())
131+
return '_:{}'.format(str(uuid.uuid4()))
132132

133133
def __attrs_post_init__(self):
134134
"""Finish object initialization."""
135135
# handle the case where ids were improperly set
136136
if self._id == 'mailto:None':
137-
self._id = '_' + str(uuid.uuid4())
137+
self._id = self.default_id()
138138

139139

140140
@attr.s
@@ -289,6 +289,10 @@ def _convert_keyword(keywords):
289289
@jsonld.s(
290290
type='schema:Dataset',
291291
context={
292+
'added': 'schema:dateCreated',
293+
'affiliation': 'schema:affiliation',
294+
'alternate_name': 'schema:alternateName',
295+
'email': 'schema:email',
292296
'schema': 'http://schema.org/',
293297
},
294298
)
@@ -317,7 +321,7 @@ class Dataset(Entity, CreatorsMixin):
317321
)
318322

319323
description = jsonld.ib(
320-
default=None, context='schema:description', kw_only=True
324+
default='', context='schema:description', kw_only=True
321325
)
322326

323327
identifier = jsonld.ib(

renku/models/provenance/agents.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -39,17 +39,14 @@ class Person:
3939
"""Represent a person."""
4040

4141
name = jsonld.ib(context='rdfs:label')
42-
email = jsonld.ib(context={
43-
'@type': '@id',
44-
'@id': 'schema:email',
45-
})
42+
email = jsonld.ib(context='schema:email')
4643

4744
_id = jsonld.ib(context='@id', init=False, kw_only=True)
4845

4946
@_id.default
5047
def default_id(self):
5148
"""Configure calculated ID."""
52-
return self.email
49+
return 'mailto:{0}'.format(self.email)
5350

5451
@email.validator
5552
def check_email(self, attribute, value):
@@ -62,7 +59,7 @@ def from_commit(cls, commit):
6259
"""Create an instance from a Git commit."""
6360
return cls(
6461
name=commit.author.name,
65-
email='mailto:{0}'.format(commit.author.email),
62+
email=commit.author.email,
6663
)
6764

6865

@@ -79,7 +76,7 @@ def from_commit(cls, commit):
7976
slots=True,
8077
)
8178
class SoftwareAgent:
82-
"""Represent a person."""
79+
"""Represent executed software."""
8380

8481
label = jsonld.ib(context='rdfs:label', kw_only=True)
8582
was_started_by = jsonld.ib(

tests/test_dataset.py

Lines changed: 1 addition & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,8 @@
2424

2525
import git
2626
import pytest
27-
import yaml
2827

29-
from renku.models.datasets import Creator, Dataset, DatasetFile
28+
from renku.models.datasets import Creator, DatasetFile
3029

3130

3231
def _key(client, dataset, filename):
@@ -121,23 +120,6 @@ def test_data_add_recursive(directory_tree, client):
121120
) == 'dir2'
122121

123122

124-
def dataset_serialization(client, dataset, data_file):
125-
"""Test deserializing a dataset object."""
126-
with open(dataset.path / 'metadata.yml', 'r') as f:
127-
source = yaml.safe_load(f)
128-
129-
dataset = Dataset.from_jsonld(source)
130-
assert dataset.path == dataset.path
131-
132-
d_dict = dataset.to_dict()
133-
134-
assert all([key in d_dict for key in ('name', 'identifier', 'files')])
135-
assert not len(d_dict['files'].values())
136-
client.add_data_to_dataset(dataset, [str(data_file)])
137-
d_dict = dataset.to_dict()
138-
assert len(d_dict['files'].values())
139-
140-
141123
def test_git_repo_import(client, dataset, tmpdir, data_repository):
142124
"""Test an import from a git repository."""
143125
# add data from local repo

tests/test_serialization.py

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
# -*- coding: utf-8 -*-
2+
#
3+
# Copyright 2017-2019- Swiss Data Science Center (SDSC)
4+
# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and
5+
# Eidgenössische Technische Hochschule Zürich (ETHZ).
6+
#
7+
# Licensed under the Apache License, Version 2.0 (the "License");
8+
# you may not use this file except in compliance with the License.
9+
# You may obtain a copy of the License at
10+
#
11+
# http://www.apache.org/licenses/LICENSE-2.0
12+
#
13+
# Unless required by applicable law or agreed to in writing, software
14+
# distributed under the License is distributed on an "AS IS" BASIS,
15+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
# See the License for the specific language governing permissions and
17+
# limitations under the License.
18+
"""Serialization tests for renku models."""
19+
20+
import datetime
21+
22+
import yaml
23+
24+
25+
def test_dataset_serialization(client, dataset, data_file):
26+
"""Test Dataset serialization."""
27+
28+
def load_dataset(name):
29+
with open(str(client.dataset_path(name))) as f:
30+
return yaml.safe_load(f)
31+
32+
d_dict = load_dataset('dataset')
33+
34+
expected_fields = [
35+
'_id', '_label', '_project', 'created', 'creator', 'date_published',
36+
'description', 'files', 'identifier', 'in_language', 'keywords',
37+
'license', 'name', 'path', 'url', 'version'
38+
]
39+
for field in expected_fields:
40+
assert field in d_dict
41+
42+
assert not d_dict['files']
43+
client.add_data_to_dataset(dataset, [str(data_file)])
44+
dataset.to_yaml()
45+
d_dict = load_dataset('dataset')
46+
assert d_dict['files']
47+
48+
49+
def test_dataset_deserialization(client, dataset):
50+
"""Test Dataset deserialization."""
51+
from renku.models.datasets import Dataset
52+
dataset_ = Dataset.from_yaml(client.dataset_path('dataset'), client=client)
53+
54+
dataset_types = {
55+
'created': datetime.datetime,
56+
'creator': list,
57+
'description': str,
58+
'files': list,
59+
'identifier': str,
60+
'keywords': list,
61+
}
62+
63+
for attribute, type_ in dataset_types.items():
64+
assert type(dataset_.__getattribute__(attribute)) is type_
65+
66+
creator_types = {'email': str, '_id': str, 'name': str, 'affiliation': str}
67+
68+
creator = dataset.creator[0]
69+
70+
for attribute, type_ in creator_types.items():
71+
assert type(creator.get(attribute)) is type_

0 commit comments

Comments
 (0)