Skip to content

Commit 85585d0

Browse files
committed
fix: ensure all Person instances have valid ids
addresses #812
1 parent 5e0147b commit 85585d0

File tree

11 files changed

+104
-129
lines changed

11 files changed

+104
-129
lines changed

docs/models/datasets.rst

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -37,11 +37,3 @@ Manage files in the dataset.
3737
.. autoclass:: renku.core.models.datasets.DatasetFile
3838
:members:
3939
:inherited-members:
40-
41-
42-
Creator
43-
-------
44-
45-
.. autoclass:: renku.core.models.datasets.Creator
46-
:members:
47-
:inherited-members:

renku/cli/exception_handler.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -134,10 +134,10 @@ def _handle_sentry(self):
134134
with capture_internal_exceptions():
135135
from git import Repo
136136
from renku.core.commands import get_git_home
137-
from renku.core.models.datasets import Creator
137+
from renku.core.models.provenance.agents import Person
138138

139139
repo = Repo(get_git_home())
140-
user = Creator.from_git(repo)
140+
user = Person.from_git(repo)
141141

142142
scope.user = {'name': user.name, 'email': user.email}
143143

renku/core/commands/dataset.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,8 @@
4242
MigrationRequired, ParameterError
4343
from renku.core.management.datasets import DATASET_METADATA_PATHS
4444
from renku.core.management.git import COMMIT_DIFF_STRATEGY
45-
from renku.core.models.datasets import Creator, Dataset
45+
from renku.core.models.datasets import Dataset
46+
from renku.core.models.provenance.agents import Person
4647
from renku.core.models.refs import LinkReference
4748
from renku.core.models.tabulate import tabulate
4849
from renku.core.utils.doi import extract_doi
@@ -107,7 +108,7 @@ def create_dataset(client, name, handle_duplicate_fn=None):
107108
existing = client.load_dataset(name=name)
108109
if (not existing or handle_duplicate_fn and handle_duplicate_fn(existing)):
109110
with client.with_dataset(name=name) as dataset:
110-
creator = Creator.from_git(client.repo)
111+
creator = Person.from_git(client.repo)
111112
if creator not in dataset.creator:
112113
dataset.creator.append(creator)
113114

renku/core/management/datasets.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,10 @@
3535

3636
from renku.core import errors
3737
from renku.core.management.config import RENKU_HOME
38-
from renku.core.models.datasets import Creator, Dataset, DatasetFile, \
39-
DatasetTag
38+
from renku.core.models.datasets import Dataset, DatasetFile, DatasetTag
4039
from renku.core.models.git import GitURL
4140
from renku.core.models.locals import with_reference
41+
from renku.core.models.provenance.agents import Person
4242
from renku.core.models.refs import LinkReference
4343

4444

@@ -391,7 +391,7 @@ def _add_from_git(self, dataset, dataset_path, url, sources, destination):
391391
creators = []
392392
# grab all the creators from the commit history
393393
for commit in repo.iter_commits(paths=path):
394-
creator = Creator.from_commit(commit)
394+
creator = Person.from_commit(commit)
395395
if creator not in creators:
396396
creators.append(creator)
397397

renku/core/management/repository.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -394,8 +394,8 @@ def init_repository(self, name=None, force=False):
394394
self.repo.description = name or path.name
395395

396396
# Check that an creator can be determined from Git.
397-
from renku.core.models.datasets import Creator
398-
Creator.from_git(self.repo)
397+
from renku.core.models.provenance.agents import Person
398+
Person.from_git(self.repo)
399399

400400
# TODO read existing gitignore and create a unique set of rules
401401
import pkg_resources

renku/core/models/datasets.py

Lines changed: 13 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
# limitations under the License.
1818
"""Model objects representing datasets."""
1919

20-
import configparser
2120
import datetime
2221
import os
2322
import pathlib
@@ -30,7 +29,6 @@
3029
import attr
3130
from attr.validators import instance_of
3231

33-
from renku.core import errors
3432
from renku.core.models.provenance.agents import Person
3533
from renku.core.models.provenance.entities import Entity
3634
from renku.core.utils.datetime8601 import parse_date
@@ -54,69 +52,24 @@ def _extract_doi(value):
5452
return value
5553

5654

57-
class Creator(Person):
58-
"""Represent the creator of a resource."""
59-
60-
affiliation = jsonld.ib(
61-
default=None, kw_only=True, context='schema:affiliation'
62-
)
63-
64-
alternate_name = jsonld.ib(
65-
default=None, kw_only=True, context='schema:alternateName'
66-
)
67-
68-
@property
69-
def short_name(self):
70-
"""Gives full name in short form."""
71-
names = self.name.split()
72-
if len(names) == 1:
73-
return self.name
74-
75-
last_name = names[-1]
76-
initials = [name[0] for name in names]
77-
initials.pop()
78-
79-
return '{0}.{1}'.format('.'.join(initials), last_name)
80-
81-
@classmethod
82-
def from_git(cls, git):
83-
"""Create an instance from a Git repo."""
84-
git_config = git.config_reader()
85-
try:
86-
name = git_config.get_value('user', 'name', None)
87-
email = git_config.get_value('user', 'email', None)
88-
except (
89-
configparser.NoOptionError, configparser.NoSectionError
90-
): # pragma: no cover
91-
raise errors.ConfigurationError(
92-
'The user name and email are not configured. '
93-
'Please use the "git config" command to configure them.\n\n'
94-
'\tgit config --global --add user.name "John Doe"\n'
95-
'\tgit config --global --add user.email '
96-
'"john.doe@example.com"\n'
97-
)
98-
99-
# Check the git configuration.
100-
if not name: # pragma: no cover
101-
raise errors.MissingUsername()
102-
if not email: # pragma: no cover
103-
raise errors.MissingEmail()
104-
105-
return cls(name=name, email=email)
55+
def _convert_creators(value):
56+
"""Convert creators."""
57+
if isinstance(value, dict): # compatibility with previous versions
58+
return [Person.from_jsonld(value)]
10659

107-
def __attrs_post_init__(self):
108-
"""Finish object initialization."""
109-
# handle the case where ids were improperly set
110-
if self._id == 'mailto:None':
111-
self._id = self.default_id()
60+
if isinstance(value, list):
61+
return [Person.from_jsonld(v) for v in value]
11262

11363

11464
@attr.s
115-
class CreatorsMixin:
65+
class PersonMixin:
11666
"""Mixin for handling creators container."""
11767

11868
creator = jsonld.container.list(
119-
Creator, kw_only=True, context='schema:creator'
69+
Person,
70+
kw_only=True,
71+
context='schema:creator',
72+
converter=_convert_creators
12073
)
12174

12275
@property
@@ -190,34 +143,16 @@ class Language:
190143
name = jsonld.ib(default=None, kw_only=True, context='schema:name')
191144

192145

193-
def _convert_dataset_files_creators(value):
194-
"""Convert dataset files creators."""
195-
coll = value
196-
197-
if isinstance(coll, dict):
198-
return [Creator.from_jsonld(coll)]
199-
200-
if isinstance(coll, list):
201-
return [Creator.from_jsonld(c) for c in coll]
202-
203-
204146
@jsonld.s(
205147
type='schema:DigitalDocument',
206148
slots=True,
207149
context={
208150
'schema': 'http://schema.org/',
209151
}
210152
)
211-
class DatasetFile(Entity, CreatorsMixin):
153+
class DatasetFile(Entity, PersonMixin):
212154
"""Represent a file in a dataset."""
213155

214-
creator = jsonld.container.list(
215-
Creator,
216-
converter=_convert_dataset_files_creators,
217-
kw_only=True,
218-
context='schema:creator'
219-
)
220-
221156
added = jsonld.ib(
222157
converter=parse_date, context='schema:dateCreated', kw_only=True
223158
)
@@ -290,15 +225,6 @@ def _convert_dataset_tags(value):
290225
return [DatasetTag.from_jsonld(v) for v in value]
291226

292227

293-
def _convert_dataset_creator(value):
294-
"""Convert dataset creators."""
295-
if isinstance(value, dict): # compatibility with previous versions
296-
return [Creator.from_jsonld(value)]
297-
298-
if isinstance(value, list):
299-
return [Creator.from_jsonld(v) for v in value]
300-
301-
302228
def _convert_language(obj):
303229
"""Convert language object."""
304230
if isinstance(obj, dict):
@@ -324,7 +250,7 @@ def _convert_keyword(keywords):
324250
'schema': 'http://schema.org/',
325251
},
326252
)
327-
class Dataset(Entity, CreatorsMixin):
253+
class Dataset(Entity, PersonMixin):
328254
"""Repesent a dataset."""
329255

330256
SUPPORTED_SCHEMES = ('', 'file', 'http', 'https', 'git+https', 'git+ssh')
@@ -337,13 +263,6 @@ class Dataset(Entity, CreatorsMixin):
337263
_id = jsonld.ib(default=None, context='@id', kw_only=True)
338264
_label = jsonld.ib(default=None, context='rdfs:label', kw_only=True)
339265

340-
creator = jsonld.container.list(
341-
Creator,
342-
converter=_convert_dataset_creator,
343-
context='schema:creator',
344-
kw_only=True
345-
)
346-
347266
date_published = jsonld.ib(
348267
default=None, context='schema:datePublished', kw_only=True
349268
)

renku/core/models/projects.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@
2323
import attr
2424

2525
from renku.core.models import jsonld
26-
from renku.core.models.datasets import Creator
2726
from renku.core.models.datastructures import Collection
27+
from renku.core.models.provenance.agents import Person
2828
from renku.core.utils.datetime8601 import parse_date
2929

3030
PROJECT_URL_PATH = 'projects'
@@ -89,14 +89,14 @@ def __attrs_post_init__(self):
8989
"""Initialize computed attributes."""
9090
if not self.creator and self.client:
9191
if self.client.renku_metadata_path.exists():
92-
self.creator = Creator.from_commit(
92+
self.creator = Person.from_commit(
9393
self.client.find_previous_commit(
9494
self.client.renku_metadata_path, return_first=True
9595
),
9696
)
9797
else:
9898
# this assumes the project is being newly created
99-
self.creator = Creator.from_git(self.client.repo)
99+
self.creator = Person.from_git(self.client.repo)
100100

101101
self._id = self.project_id
102102

renku/core/models/provenance/agents.py

Lines changed: 53 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,11 @@
1717
# limitations under the License.
1818
"""Represent provenance agents."""
1919

20+
import configparser
2021
import re
2122
import uuid
2223

24+
from renku.core import errors
2325
from renku.core.models import jsonld as jsonld
2426
from renku.version import __version__, version_url
2527

@@ -33,7 +35,6 @@
3335
'schema': 'http://schema.org/',
3436
'prov': 'http://www.w3.org/ns/prov#',
3537
},
36-
frozen=True,
3738
slots=True,
3839
)
3940
class Person:
@@ -42,7 +43,12 @@ class Person:
4243
name = jsonld.ib(context='schema:name')
4344
email = jsonld.ib(context='schema:email', default=None, kw_only=True)
4445
label = jsonld.ib(context='rdfs:label', kw_only=True)
45-
46+
affiliation = jsonld.ib(
47+
default=None, kw_only=True, context='schema:affiliation'
48+
)
49+
alternate_name = jsonld.ib(
50+
default=None, kw_only=True, context='schema:alternateName'
51+
)
4652
_id = jsonld.ib(context='@id', kw_only=True)
4753

4854
@_id.default
@@ -73,6 +79,51 @@ def from_commit(cls, commit):
7379
email=commit.author.email,
7480
)
7581

82+
@property
83+
def short_name(self):
84+
"""Gives full name in short form."""
85+
names = self.name.split()
86+
if len(names) == 1:
87+
return self.name
88+
89+
last_name = names[-1]
90+
initials = [name[0] for name in names]
91+
initials.pop()
92+
93+
return '{0}.{1}'.format('.'.join(initials), last_name)
94+
95+
@classmethod
96+
def from_git(cls, git):
97+
"""Create an instance from a Git repo."""
98+
git_config = git.config_reader()
99+
try:
100+
name = git_config.get_value('user', 'name', None)
101+
email = git_config.get_value('user', 'email', None)
102+
except (
103+
configparser.NoOptionError, configparser.NoSectionError
104+
): # pragma: no cover
105+
raise errors.ConfigurationError(
106+
'The user name and email are not configured. '
107+
'Please use the "git config" command to configure them.\n\n'
108+
'\tgit config --global --add user.name "John Doe"\n'
109+
'\tgit config --global --add user.email '
110+
'"john.doe@example.com"\n'
111+
)
112+
113+
# Check the git configuration.
114+
if not name: # pragma: no cover
115+
raise errors.MissingUsername()
116+
if not email: # pragma: no cover
117+
raise errors.MissingEmail()
118+
119+
return cls(name=name, email=email)
120+
121+
def __attrs_post_init__(self):
122+
"""Finish object initialization."""
123+
# handle the case where ids were improperly set
124+
if self._id == 'mailto:None':
125+
self._id = self.default_id()
126+
76127

77128
@jsonld.s(
78129
type=[

tests/core/commands/test_dataset.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,8 @@
2525
import git
2626
import pytest
2727

28-
from renku.core.models.datasets import Creator, Dataset, DatasetFile
28+
from renku.core.models.datasets import Dataset, DatasetFile
29+
from renku.core.models.provenance.agents import Person
2930

3031

3132
def _key(client, dataset, filename):
@@ -140,7 +141,7 @@ def test_git_repo_import(client, dataset, tmpdir, data_repository):
140141

141142
@pytest.mark.parametrize(
142143
'creators', [
143-
[Creator(name='me', email='me@example.com')],
144+
[Person(name='me', email='me@example.com')],
144145
[{
145146
'name': 'me',
146147
'email': 'me@example.com',
@@ -150,14 +151,14 @@ def test_git_repo_import(client, dataset, tmpdir, data_repository):
150151
def test_creator_parse(creators, data_file):
151152
"""Test that different options for specifying creators work."""
152153
f = DatasetFile(path='file', creator=creators)
153-
creator = Creator(name='me', email='me@example.com')
154+
creator = Person(name='me', email='me@example.com')
154155
assert creator in f.creator
155156

156157
# email check
157158
with pytest.raises(ValueError):
158-
Creator(name='me', email='meexample.com')
159+
Person(name='me', email='meexample.com')
159160

160-
# creators must be a set or list of dicts or Creator
161+
# creators must be a set or list of dicts or Person
161162
with pytest.raises(ValueError):
162163
f = DatasetFile(path='file', creator=['name'])
163164

0 commit comments

Comments
 (0)