Skip to content

Commit fc6fd4f

Browse files
authored
feat(datasets): export dataset to zenodo (#529)
1 parent ae67be7 commit fc6fd4f

File tree

10 files changed

+728
-134
lines changed

10 files changed

+728
-134
lines changed

conftest.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,3 +253,13 @@ def add_client(doctest_namespace):
253253
"""Add Renku client to doctest namespace."""
254254
from renku.api import LocalClient
255255
doctest_namespace['client'] = LocalClient(path=tempfile.mkdtemp())
256+
257+
258+
@pytest.fixture
259+
def zenodo_sandbox(client):
260+
"""Configure environment to use Zenodo sandbox environment."""
261+
os.environ['ZENODO_USE_SANDBOX'] = 'true'
262+
client.set_value(
263+
'zenodo', 'access_token',
264+
'HPwXfABPZ7JNiwXMrktL7pevuuo9jt4gsUCkh3Gs2apg65ixa3JPyFukdGup'
265+
)

renku/api/config.py

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -82,10 +82,15 @@ def load_config(self):
8282
def store_config(self, config):
8383
"""Persists global configuration object."""
8484
os.umask(0)
85-
fd = os.open(str(self.config_path), os.O_CREAT | os.O_WRONLY, 0o600)
86-
with open(fd, 'w') as file:
85+
fd = os.open(
86+
str(self.config_path), os.O_CREAT | os.O_RDWR | os.O_TRUNC, 0o600
87+
)
88+
89+
with open(fd, 'w+') as file:
8790
config.write(file)
8891

92+
return self.load_config()
93+
8994
def get_value(self, section, key):
9095
"""Get value from specified section and key."""
9196
config = self.load_config()
@@ -99,7 +104,20 @@ def set_value(self, section, key, value):
99104
else:
100105
config[section] = {key: value}
101106

102-
self.store_config(config)
107+
config = self.store_config(config)
108+
return config
109+
110+
def remove_value(self, section, key):
111+
"""Remove key from specified section."""
112+
config = self.load_config()
113+
114+
if section in config:
115+
config[section].pop(key)
116+
117+
if not config[section].keys():
118+
config.pop(section)
119+
120+
config = self.store_config(config)
103121
return config
104122

105123

renku/cli/_providers/__init__.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@
2525
class ProviderFactory:
2626
"""Create a provider type from URI."""
2727

28+
PROVIDERS = {'zenodo': ZenodoProvider}
29+
2830
@staticmethod
2931
def from_uri(uri):
3032
"""Get provider type based on uri."""
@@ -46,3 +48,8 @@ def from_uri(uri):
4648
)
4749

4850
return provider, None
51+
52+
@staticmethod
53+
def from_id(provider_id):
54+
"""Get provider type based on identifier."""
55+
return ProviderFactory.PROVIDERS[provider_id]()

renku/cli/_providers/api.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,27 @@ class ProviderApi(abc.ABC):
2424
def find_record(self, uri):
2525
"""Find record by uri."""
2626
pass
27+
28+
@abc.abstractmethod
29+
def get_exporter(self, dataset, secret):
30+
"""Get export manager."""
31+
pass
32+
33+
34+
class ExporterApi(abc.ABC):
35+
"""Interface defining exporter methods."""
36+
37+
@abc.abstractmethod
38+
def set_access_token(self, access_token):
39+
"""Set access token."""
40+
pass
41+
42+
@abc.abstractmethod
43+
def access_token_url(self):
44+
"""Endpoint for creation of access token."""
45+
pass
46+
47+
@abc.abstractmethod
48+
def export(self, publish):
49+
"""Execute export process."""
50+
pass

renku/cli/_providers/doi.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,3 +97,7 @@ def find_record(self, uri):
9797
"""Finds DOI record."""
9898
response = self._query(uri).json()
9999
return DOIProvider._serialize(response)
100+
101+
def get_exporter(self, dataset, secret):
102+
"""Implements interface ProviderApi."""
103+
pass

renku/cli/_providers/zenodo.py

Lines changed: 240 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,30 +16,64 @@
1616
# See the License for the specific language governing permissions and
1717
# limitations under the License.
1818
"""Zenodo API integration."""
19+
import json
20+
import os
1921
import pathlib
2022
import urllib
2123
from urllib.parse import urlparse
2224

2325
import attr
2426
import requests
27+
from requests import HTTPError
28+
from tqdm import tqdm
2529

26-
from renku.cli._providers.api import ProviderApi
30+
from renku._compat import Path
31+
from renku.cli._providers.api import ExporterApi, ProviderApi
2732
from renku.cli._providers.doi import DOIProvider
2833
from renku.models.datasets import Dataset, DatasetFile
2934
from renku.utils.doi import is_doi
3035

3136
ZENODO_BASE_URL = 'https://zenodo.org'
32-
ZENODO_BASE_PATH = 'api'
37+
ZENODO_SANDBOX_URL = 'https://sandbox.zenodo.org/'
38+
39+
ZENODO_API_PATH = 'api'
40+
41+
ZENODO_DEPOSIT_PATH = 'deposit'
42+
ZENODO_PUBLISH_PATH = 'record'
43+
44+
ZENODO_PUBLISH_ACTION_PATH = 'depositions/{0}/actions/publish'
45+
ZENODO_METADATA_URL = 'depositions/{0}'
46+
ZENODO_FILES_URL = 'depositions/{0}/files'
47+
ZENODO_NEW_DEPOSIT_URL = 'depositions'
3348

3449

3550
def make_records_url(record_id):
3651
"""Create URL to access record by ID."""
3752
return urllib.parse.urljoin(
3853
ZENODO_BASE_URL,
39-
pathlib.posixpath.join(ZENODO_BASE_PATH, 'records', record_id)
54+
pathlib.posixpath.join(ZENODO_API_PATH, 'records', record_id)
4055
)
4156

4257

58+
def check_or_raise(response):
59+
"""Check for expected response status code."""
60+
if response.status_code not in [200, 201, 202]:
61+
if response.status_code == 401:
62+
raise HTTPError('Access unauthorized - update access token.')
63+
64+
if response.status_code == 400:
65+
err_response = response.json()
66+
errors = [
67+
'"{0}" failed with "{1}"'.format(err['field'], err['message'])
68+
for err in err_response['errors']
69+
]
70+
71+
raise HTTPError('\n' + '\n'.join(errors))
72+
73+
else:
74+
raise HTTPError(response.content)
75+
76+
4377
@attr.s
4478
class ZenodoFileSerializer:
4579
"""Zenodo record file."""
@@ -224,6 +258,205 @@ def as_dataset(self):
224258
return dataset
225259

226260

261+
@attr.s
262+
class ZenodoDeposition:
263+
"""Zenodo record for deposit."""
264+
265+
exporter = attr.ib()
266+
id = attr.ib(default=None)
267+
268+
@property
269+
def publish_url(self):
270+
"""Returns publish URL."""
271+
url = urllib.parse.urljoin(
272+
self.exporter.zenodo_url,
273+
pathlib.posixpath.join(
274+
ZENODO_API_PATH, ZENODO_DEPOSIT_PATH,
275+
ZENODO_PUBLISH_ACTION_PATH.format(self.id)
276+
)
277+
)
278+
279+
return url
280+
281+
@property
282+
def attach_metadata_url(self):
283+
"""Return URL for attaching metadata."""
284+
url = urllib.parse.urljoin(
285+
self.exporter.zenodo_url,
286+
pathlib.posixpath.join(
287+
ZENODO_API_PATH, ZENODO_DEPOSIT_PATH,
288+
ZENODO_METADATA_URL.format(self.id)
289+
)
290+
)
291+
return url
292+
293+
@property
294+
def upload_file_url(self):
295+
"""Return URL for uploading file."""
296+
url = urllib.parse.urljoin(
297+
self.exporter.zenodo_url,
298+
pathlib.posixpath.join(
299+
ZENODO_API_PATH, ZENODO_DEPOSIT_PATH,
300+
ZENODO_FILES_URL.format(self.id)
301+
)
302+
)
303+
return url
304+
305+
@property
306+
def new_deposit_url(self):
307+
"""Return URL for creating new deposit."""
308+
url = urllib.parse.urljoin(
309+
self.exporter.zenodo_url,
310+
pathlib.posixpath.join(
311+
ZENODO_API_PATH, ZENODO_DEPOSIT_PATH, ZENODO_NEW_DEPOSIT_URL
312+
)
313+
)
314+
return url
315+
316+
@property
317+
def published_at(self):
318+
"""Return published at URL."""
319+
url = urllib.parse.urljoin(
320+
self.exporter.zenodo_url,
321+
pathlib.posixpath.join(ZENODO_PUBLISH_PATH, str(self.id))
322+
)
323+
return url
324+
325+
@property
326+
def deposit_at(self):
327+
"""Return deposit at URL."""
328+
url = urllib.parse.urljoin(
329+
self.exporter.zenodo_url,
330+
pathlib.posixpath.join(ZENODO_DEPOSIT_PATH, str(self.id))
331+
)
332+
return url
333+
334+
def new_deposition(self):
335+
"""Create new deposition on Zenodo."""
336+
response = requests.post(
337+
url=self.new_deposit_url,
338+
params=self.exporter.default_params,
339+
json={},
340+
headers=self.exporter.HEADERS
341+
)
342+
check_or_raise(response)
343+
344+
return response
345+
346+
def upload_file(self, filepath):
347+
"""Upload and attach a file to existing deposition on Zenodo."""
348+
request_payload = {'filename': Path(filepath).name}
349+
file = {'file': open(str(filepath), 'rb')}
350+
351+
response = requests.post(
352+
url=self.upload_file_url,
353+
params=self.exporter.default_params,
354+
data=request_payload,
355+
files=file,
356+
)
357+
check_or_raise(response)
358+
359+
return response
360+
361+
def attach_metadata(self, dataset):
362+
"""Attach metadata to deposition on Zenodo."""
363+
request_payload = {
364+
'metadata': {
365+
'title': dataset.name,
366+
'upload_type': 'dataset',
367+
'description': dataset.description,
368+
'creators': [{
369+
'name': creator.name,
370+
'affiliation': creator.affiliation
371+
} for creator in dataset.creator]
372+
}
373+
}
374+
375+
response = requests.put(
376+
url=self.attach_metadata_url,
377+
params=self.exporter.default_params,
378+
data=json.dumps(request_payload),
379+
headers=self.exporter.HEADERS
380+
)
381+
check_or_raise(response)
382+
383+
return response
384+
385+
def publish_deposition(self, secret):
386+
"""Publish existing deposition."""
387+
response = requests.post(
388+
url=self.publish_url, params=self.exporter.default_params
389+
)
390+
check_or_raise(response)
391+
392+
return response
393+
394+
def __attrs_post_init__(self):
395+
"""Post-Init hook to set _id field."""
396+
response = self.new_deposition()
397+
self.id = response.json()['id']
398+
399+
400+
@attr.s
401+
class ZenodoExporter(ExporterApi):
402+
"""Zenodo export manager."""
403+
404+
HEADERS = {'Content-Type': 'application/json'}
405+
406+
dataset = attr.ib()
407+
access_token = attr.ib()
408+
409+
@property
410+
def zenodo_url(self):
411+
"""Returns correct Zenodo URL based on environment."""
412+
if 'ZENODO_USE_SANDBOX' in os.environ:
413+
return ZENODO_SANDBOX_URL
414+
415+
return ZENODO_BASE_URL
416+
417+
def set_access_token(self, access_token):
418+
"""Set access token."""
419+
self.access_token = access_token
420+
421+
def access_token_url(self):
422+
"""Return endpoint for creation of access token."""
423+
return urllib.parse.urlparse(
424+
'https://zenodo.org/account/settings/applications/tokens/new/'
425+
).geturl()
426+
427+
@property
428+
def default_params(self):
429+
"""Create request default params."""
430+
return {'access_token': self.access_token}
431+
432+
def dataset_to_request(self):
433+
"""Prepare dataset metadata for request."""
434+
jsonld = self.dataset.asjsonld()
435+
jsonld['upload_type'] = 'dataset'
436+
return jsonld
437+
438+
def export(self, publish):
439+
"""Execute entire export process."""
440+
# Step 1. Create new deposition
441+
deposition = ZenodoDeposition(exporter=self)
442+
443+
# Step 2. Upload all files to created deposition
444+
with tqdm(total=len(self.dataset.files)) as progressbar:
445+
for file_ in self.dataset.files:
446+
deposition.upload_file(file_.full_path, )
447+
progressbar.update(1)
448+
449+
# Step 3. Attach metadata to deposition
450+
deposition.attach_metadata(self.dataset)
451+
452+
# Step 4. Publish newly created deposition
453+
if publish:
454+
deposition.publish_deposition(self.access_token)
455+
return deposition.published_at
456+
457+
return deposition.deposit_at
458+
459+
227460
@attr.s
228461
class ZenodoProvider(ProviderApi):
229462
"""zenodo.org registry API provider."""
@@ -278,3 +511,7 @@ def get_record(self, uri):
278511
response = self.make_request(uri)
279512

280513
return ZenodoRecordSerializer(**response.json(), zenodo=self, uri=uri)
514+
515+
def get_exporter(self, dataset, access_token):
516+
"""Create export manager for given dataset."""
517+
return ZenodoExporter(dataset=dataset, access_token=access_token)

0 commit comments

Comments
 (0)