Skip to content

Commit 9f0f9a1

Browse files
Panaetiusjsam
authored andcommitted
feat: dataverse import (#626)
* feat: add DataverseProvider for importing datasets * feat: add support for arbitrary dataverse repos * Removes leftover url_lower variable * adds error handling for http errors moves provider detection to individual providers fixes dataset file property casing improves doi uri sanitizing * Adds unit tests * Moves unit tests to integration tests * Fix formatting * Fixes bug where provider resolution depended on the output order of dict.items() * Removes is_doi variable and adds dynamic message for supported providers * Fixes minor formatting and style issues for PR * Fixes import sorting
1 parent e960a98 commit 9f0f9a1

File tree

10 files changed

+585
-80
lines changed

10 files changed

+585
-80
lines changed

conftest.py

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,14 @@
1717
# limitations under the License.
1818
"""Pytest configuration."""
1919

20+
import json
2021
import os
22+
import pathlib
23+
import re
2124
import shutil
2225
import tempfile
2326
import time
27+
import urllib
2428

2529
import pytest
2630
import responses
@@ -299,6 +303,84 @@ def zenodo_sandbox(client):
299303
)
300304

301305

306+
@pytest.fixture
307+
def doi_responses():
308+
"""Responses for doi.org requests."""
309+
from renku.cli._providers.doi import DOI_BASE_URL
310+
from renku.cli._providers.dataverse import (
311+
DATAVERSE_API_PATH, DATAVERSE_VERSION_API
312+
)
313+
314+
with responses.RequestsMock(assert_all_requests_are_fired=False) as rsps:
315+
316+
def doi_callback(request):
317+
response_url = (
318+
'https://dataverse.harvard.edu/citation'
319+
'?persistentId=doi:10.11588/data/yyxx1122'
320+
)
321+
if 'zenodo' in request.url:
322+
response_url = 'https://zenodo.org/record/3363060'
323+
return (
324+
200, {
325+
'Content-Type': 'application/json'
326+
},
327+
json.dumps({
328+
'type': 'dataset',
329+
'id': request.url,
330+
'author': [{
331+
'family': 'Doe',
332+
'given': 'John'
333+
}],
334+
'contributor': [{
335+
'contributorType': 'ContactPerson',
336+
'family': 'Doe',
337+
'given': 'John'
338+
}],
339+
'issued': {
340+
'date-parts': [[2019]]
341+
},
342+
'abstract': 'Test Dataset',
343+
'DOI': '10.11588/data/yyxx1122',
344+
'publisher': 'heiDATA',
345+
'title': 'dataset',
346+
'URL': response_url
347+
})
348+
)
349+
350+
rsps.add_callback(
351+
method='GET',
352+
url=re.compile('{base_url}/.*'.format(base_url=DOI_BASE_URL)),
353+
callback=doi_callback
354+
)
355+
356+
def version_callback(request):
357+
return (
358+
200, {
359+
'Content-Type': 'application/json'
360+
},
361+
json.dumps({
362+
'status': 'OK',
363+
'data': {
364+
'version': '4.1.3',
365+
'build': 'abcdefg'
366+
}
367+
})
368+
)
369+
370+
base_url = 'https://dataverse.harvard.edu'
371+
372+
url_parts = list(urllib.parse.urlparse(base_url))
373+
url_parts[2] = pathlib.posixpath.join(
374+
DATAVERSE_API_PATH, DATAVERSE_VERSION_API
375+
)
376+
pattern = '{url}.*'.format(url=urllib.parse.urlunparse(url_parts))
377+
378+
rsps.add_callback(
379+
method='GET', url=re.compile(pattern), callback=version_callback
380+
)
381+
yield rsps
382+
383+
302384
@pytest.fixture
303385
def cli(client, run):
304386
"""Return a callable Renku CLI.

renku/cli/_providers/__init__.py

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,35 +19,54 @@
1919
from urllib.parse import urlparse
2020

2121
from renku.cli._providers.zenodo import ZenodoProvider
22+
from renku.cli._providers.dataverse import DataverseProvider
2223
from renku.utils.doi import is_doi
2324

2425

2526
class ProviderFactory:
2627
"""Create a provider type from URI."""
2728

28-
PROVIDERS = {'zenodo': ZenodoProvider}
29+
PROVIDERS = {'dataverse': DataverseProvider, 'zenodo': ZenodoProvider}
2930

3031
@staticmethod
3132
def from_uri(uri):
3233
"""Get provider type based on uri."""
3334
is_doi_ = is_doi(uri)
34-
if is_doi_ is False:
35+
if is_doi_ is None:
3536
url = urlparse(uri)
3637
if bool(url.scheme and url.netloc and url.params == '') is False:
3738
return None, 'Cannot parse URL.'
3839

3940
provider = None
40-
if 'zenodo' in uri:
41-
provider = ZenodoProvider(is_doi=is_doi_)
41+
warning = ''
42+
43+
for _, potential_provider in ProviderFactory.PROVIDERS.items():
44+
try:
45+
if potential_provider.supports(uri):
46+
provider = potential_provider
47+
break
48+
except (Exception, BaseException) as e:
49+
warning += 'Couldn\'t test provider {prov}: {err}\n'.format(
50+
prov=potential_provider, err=e
51+
)
52+
53+
supported_providers = ', '.join(ProviderFactory.PROVIDERS.keys())
4254

4355
if is_doi_ and provider is None:
4456
return None, (
45-
'Provider {} not found. '.format(
57+
warning + 'Provider {} not found. '.format(
4658
uri.split('/')[1].split('.')[0] # Get DOI provider name.
47-
) + 'Currently supporting following providers: (Zenodo, )'
59+
) + 'Currently supporting following providers: {}'.
60+
format(supported_providers)
4861
)
49-
50-
return provider, None
62+
elif provider is None:
63+
return None, (
64+
warning + 'Provider not found for {}. '.format(uri) +
65+
'Currently supporting following providers: {}'.
66+
format(supported_providers)
67+
)
68+
else:
69+
return provider(is_doi=is_doi_), warning
5170

5271
@staticmethod
5372
def from_id(provider_id):

renku/cli/_providers/api.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,12 @@ def get_exporter(self, dataset, secret):
3030
"""Get export manager."""
3131
pass
3232

33+
@staticmethod
34+
@abc.abstractmethod
35+
def supports(uri):
36+
"""Whether or not this provider supports a given uri."""
37+
pass
38+
3339

3440
class ExporterApi(abc.ABC):
3541
"""Interface defining exporter methods."""

0 commit comments

Comments
 (0)