Skip to content

Commit

Permalink
fix(core): add missing Subject to dataverse export (#2420)
Browse files Browse the repository at this point in the history
  • Loading branch information
Panaetius committed Oct 14, 2021
1 parent 6a56312 commit 942941c
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 18 deletions.
30 changes: 30 additions & 0 deletions renku/core/commands/providers/dataverse.py
Expand Up @@ -44,6 +44,7 @@
from renku.core.metadata.immutable import DynamicProxy
from renku.core.models.dataset import DatasetFile
from renku.core.models.provenance.agent import PersonSchema
from renku.core.utils import communication
from renku.core.utils.doi import extract_doi, is_doi
from renku.core.utils.file_size import bytes_to_unit
from renku.core.utils.git import get_content
Expand All @@ -58,6 +59,24 @@
DATAVERSE_EXPORTER = "schema.org"


DATAVERSE_SUBJECTS = [
"Agricultural Sciences",
"Arts and Humanities",
"Astronomy and Astrophysics",
"Business and Management",
"Chemistry",
"Computer and Information Science",
"Earth and Environmental Sciences",
"Engineering",
"Law",
"Mathematical Sciences",
"Medicine, Health and Life Sciences",
"Physics",
"Social Sciences",
"Other",
]


class _DataverseDatasetSchema(ProviderDatasetSchema):
"""Schema for Dataverse datasets."""

Expand Down Expand Up @@ -438,15 +457,26 @@ def export(self, publish, client=None, **kwargs):

def _get_dataset_metadata(self):
authors, contacts = self._get_creators()
subject = self._get_subject()
metadata_template = Template(DATASET_METADATA_TEMPLATE)
metadata = metadata_template.substitute(
name=_escape_json_string(self.dataset.title),
authors=json.dumps(authors),
contacts=json.dumps(contacts),
description=_escape_json_string(self.dataset.description),
subject=subject,
)
return json.loads(metadata)

def _get_subject(self):
text_prompt = "Subject of this dataset: \n\n"
text_prompt += "\n".join(f"{s}\t[{i}]" for i, s in enumerate(DATAVERSE_SUBJECTS, start=1))
text_prompt += "\n\nSubject"

selection = communication.prompt(text_prompt, type=int, default=len(DATAVERSE_SUBJECTS)) or 0

return DATAVERSE_SUBJECTS[selection - 1]

def _get_creators(self):
authors = []
contacts = []
Expand Down
Expand Up @@ -56,7 +56,7 @@
"typeName": "dsDescription"
},
{
"value": [],
"value": ["${subject}"],
"typeClass": "controlledVocabulary",
"multiple": true,
"typeName": "subject"
Expand Down
39 changes: 22 additions & 17 deletions tests/cli/test_integration_datasets.py
Expand Up @@ -480,11 +480,11 @@ def test_renku_dataset_import_missing_lfs_objects(runner, project):
@pytest.mark.integration
@retry_failed
@pytest.mark.parametrize(
"provider,params,output",
"provider,params,output,input",
[
("zenodo", [], "zenodo.org/deposit"),
("dataverse", ["--dataverse-name", "sdsc-test-dataverse"], "doi:"),
("olos", ["--dlcm-server", "https://sandbox.dlcm.ch/"], "sandbox.dlcm.ch/ingestion/preingest/deposits/"),
("zenodo", [], "zenodo.org/deposit", None),
("dataverse", ["--dataverse-name", "sdsc-test-dataverse"], "doi:", "1"),
("olos", ["--dlcm-server", "https://sandbox.dlcm.ch/"], "sandbox.dlcm.ch/ingestion/preingest/deposits/", None),
],
)
def test_dataset_export_upload_file(
Expand All @@ -497,6 +497,7 @@ def test_dataset_export_upload_file(
provider,
params,
output,
input,
client_database_injection_manager,
):
"""Test successful uploading of a file to Zenodo/Dataverse deposit."""
Expand All @@ -521,7 +522,9 @@ def test_dataset_export_upload_file(
client.repo.git.add(all=True)
client.repo.index.commit("metadata updated")

result = runner.invoke(cli, ["dataset", "export", "my-dataset", provider] + params, catch_exceptions=False)
result = runner.invoke(
cli, ["dataset", "export", "my-dataset", provider] + params, input=input, catch_exceptions=False
)

assert 0 == result.exit_code, format_result_exception(result) + str(result.stderr_bytes)
assert "Exported to:" in result.output
Expand All @@ -531,11 +534,11 @@ def test_dataset_export_upload_file(
@pytest.mark.integration
@retry_failed
@pytest.mark.parametrize(
"provider,params,output",
"provider,params,output,input",
[
("zenodo", [], "zenodo.org/deposit"),
("dataverse", ["--dataverse-name", "sdsc-test-dataverse"], "doi:"),
("olos", ["--dlcm-server", "https://sandbox.dlcm.ch/"], "sandbox.dlcm.ch/ingestion/preingest/deposits/"),
("zenodo", [], "zenodo.org/deposit", None),
("dataverse", ["--dataverse-name", "sdsc-test-dataverse"], "doi:", "1"),
("olos", ["--dlcm-server", "https://sandbox.dlcm.ch/"], "sandbox.dlcm.ch/ingestion/preingest/deposits/", None),
],
)
def test_dataset_export_upload_tag(
Expand All @@ -548,6 +551,7 @@ def test_dataset_export_upload_tag(
provider,
params,
output,
input,
client_database_injection_manager,
):
"""Test successful uploading of a file to Zenodo/Dataverse deposit."""
Expand Down Expand Up @@ -587,19 +591,19 @@ def test_dataset_export_upload_tag(
result = runner.invoke(cli, ["dataset", "tag", "my-dataset", "2.0"])
assert 0 == result.exit_code, format_result_exception(result) + str(result.stderr_bytes)

result = runner.invoke(cli, ["dataset", "export", "my-dataset", provider, "-t", "2.0"] + params)
result = runner.invoke(cli, ["dataset", "export", "my-dataset", provider, "-t", "2.0"] + params, input=input)

assert 0 == result.exit_code, format_result_exception(result)
assert "Exported to:" in result.output
assert output in result.output

result = runner.invoke(cli, ["dataset", "export", "my-dataset", provider, "-t", "1.0"] + params)
result = runner.invoke(cli, ["dataset", "export", "my-dataset", provider, "-t", "1.0"] + params, input=input)

assert 0 == result.exit_code, format_result_exception(result) + str(result.stderr_bytes)
assert "Exported to:" in result.output
assert output in result.output

result = runner.invoke(cli, ["dataset", "export", "my-dataset", provider] + params, input="0") # HEAD
result = runner.invoke(cli, ["dataset", "export", "my-dataset", provider] + params, input=f"0\n{input}") # HEAD

assert 0 == result.exit_code, format_result_exception(result) + str(result.stderr_bytes)
assert "Exported to:" in result.output
Expand All @@ -609,11 +613,11 @@ def test_dataset_export_upload_tag(
@pytest.mark.integration
@retry_failed
@pytest.mark.parametrize(
"provider,params,output",
"provider,params,output,input",
[
("zenodo", [], "zenodo.org/deposit"),
("dataverse", ["--dataverse-name", "sdsc-test-dataverse"], "doi:"),
("olos", ["--dlcm-server", "https://sandbox.dlcm.ch/"], "sandbox.dlcm.ch/ingestion/preingest/deposits/"),
("zenodo", [], "zenodo.org/deposit", None),
("dataverse", ["--dataverse-name", "sdsc-test-dataverse"], "doi:", "1"),
("olos", ["--dlcm-server", "https://sandbox.dlcm.ch/"], "sandbox.dlcm.ch/ingestion/preingest/deposits/", None),
],
)
def test_dataset_export_upload_multiple(
Expand All @@ -626,6 +630,7 @@ def test_dataset_export_upload_multiple(
provider,
params,
output,
input,
client_database_injection_manager,
):
"""Test successful uploading of a files to Zenodo deposit."""
Expand Down Expand Up @@ -653,7 +658,7 @@ def test_dataset_export_upload_multiple(
client.repo.git.add(all=True)
client.repo.index.commit("metadata updated")

result = runner.invoke(cli, ["dataset", "export", "my-dataset", provider] + params)
result = runner.invoke(cli, ["dataset", "export", "my-dataset", provider] + params, input=input)

assert 0 == result.exit_code, format_result_exception(result) + str(result.stderr_bytes)
assert "Exported to:" in result.output
Expand Down

0 comments on commit 942941c

Please sign in to comment.