Skip to content

Commit

Permalink
fix(core): add missing Subject to dataverse export (#2420)
Browse files Browse the repository at this point in the history
  • Loading branch information
Panaetius committed Nov 17, 2021
1 parent 42e2c4e commit ad7b4c8
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 22 deletions.
30 changes: 30 additions & 0 deletions renku/core/commands/providers/dataverse.py
Expand Up @@ -34,6 +34,7 @@
from renku.core.commands.providers.doi import DOIProvider
from renku.core.models.datasets import Dataset, DatasetFile, DatasetSchema
from renku.core.models.provenance.agents import PersonSchema
from renku.core.utils import communication
from renku.core.utils.doi import extract_doi, is_doi
from renku.core.utils.requests import retry

Expand All @@ -48,6 +49,24 @@
DATAVERSE_EXPORTER = "schema.org"


DATAVERSE_SUBJECTS = [
"Agricultural Sciences",
"Arts and Humanities",
"Astronomy and Astrophysics",
"Business and Management",
"Chemistry",
"Computer and Information Science",
"Earth and Environmental Sciences",
"Engineering",
"Law",
"Mathematical Sciences",
"Medicine, Health and Life Sciences",
"Physics",
"Social Sciences",
"Other",
]


class _DataverseDatasetSchema(DatasetSchema):
"""Schema for Dataverse datasets."""

Expand Down Expand Up @@ -411,15 +430,26 @@ def export(self, publish, **kwargs):

def _get_dataset_metadata(self):
authors, contacts = self._get_creators()
subject = self._get_subject()
metadata_template = Template(DATASET_METADATA_TEMPLATE)
metadata = metadata_template.substitute(
name=_escape_json_string(self.dataset.title),
authors=json.dumps(authors),
contacts=json.dumps(contacts),
description=_escape_json_string(self.dataset.description),
subject=subject,
)
return json.loads(metadata)

def _get_subject(self):
text_prompt = "Subject of this dataset: \n\n"
text_prompt += "\n".join(f"{s}\t[{i}]" for i, s in enumerate(DATAVERSE_SUBJECTS, start=1))
text_prompt += "\n\nSubject"

selection = communication.prompt(text_prompt, type=int, default=len(DATAVERSE_SUBJECTS)) or 0

return DATAVERSE_SUBJECTS[selection - 1]

def _get_creators(self):
authors = []
contacts = []
Expand Down
Expand Up @@ -56,7 +56,7 @@
"typeName": "dsDescription"
},
{
"value": [],
"value": ["${subject}"],
"typeClass": "controlledVocabulary",
"multiple": true,
"typeName": "subject"
Expand Down
45 changes: 25 additions & 20 deletions tests/cli/test_integration_datasets.py
Expand Up @@ -479,15 +479,16 @@ def test_renku_dataset_import_missing_lfs_objects(runner, project):
@pytest.mark.integration
@flaky(max_runs=10, min_passes=1)
@pytest.mark.parametrize(
"provider,params,output",
"provider,params,output,input",
[
("zenodo", [], "zenodo.org/deposit"),
("dataverse", ["--dataverse-name", "sdsc-test-dataverse"], "doi:"),
("olos", ["--dlcm-server", "https://sandbox.dlcm.ch/"], "sandbox.dlcm.ch/ingestion/preingest/deposits/"),
("zenodo", [], "zenodo.org/deposit", None),
("dataverse", ["--dataverse-name", "sdsc-test-dataverse"], "doi:", "1"),
("olos", ["--dlcm-server", "https://sandbox.dlcm.ch/"], "sandbox.dlcm.ch/ingestion/preingest/deposits/", None),
],
)
def test_dataset_export_upload_file(
runner, project, tmpdir, client, zenodo_sandbox, dataverse_demo, olos_sandbox, provider, params, output
runner, project, tmpdir, client, zenodo_sandbox, dataverse_demo, olos_sandbox, provider, params, output,
input,
):
"""Test successful uploading of a file to Zenodo/Dataverse deposit."""
result = runner.invoke(cli, ["dataset", "create", "my-dataset"])
Expand All @@ -511,7 +512,9 @@ def test_dataset_export_upload_file(
data_repo.git.add(update=True)
data_repo.index.commit("metadata updated")

result = runner.invoke(cli, ["dataset", "export", "my-dataset", provider] + params)
result = runner.invoke(
cli, ["dataset", "export", "my-dataset", provider] + params, input=input, catch_exceptions=False
)

assert 0 == result.exit_code, result.output + str(result.stderr_bytes)
assert "Exported to:" in result.output
Expand All @@ -521,15 +524,16 @@ def test_dataset_export_upload_file(
@pytest.mark.integration
@flaky(max_runs=10, min_passes=1)
@pytest.mark.parametrize(
"provider,params,output",
"provider,params,output,input",
[
("zenodo", [], "zenodo.org/deposit"),
("dataverse", ["--dataverse-name", "sdsc-test-dataverse"], "doi:"),
("olos", ["--dlcm-server", "https://sandbox.dlcm.ch/"], "sandbox.dlcm.ch/ingestion/preingest/deposits/"),
("zenodo", [], "zenodo.org/deposit", None),
("dataverse", ["--dataverse-name", "sdsc-test-dataverse"], "doi:", "1"),
("olos", ["--dlcm-server", "https://sandbox.dlcm.ch/"], "sandbox.dlcm.ch/ingestion/preingest/deposits/", None),
],
)
def test_dataset_export_upload_tag(
runner, project, tmpdir, client, zenodo_sandbox, dataverse_demo, olos_sandbox, provider, params, output
runner, project, tmpdir, client, zenodo_sandbox, dataverse_demo, olos_sandbox, provider, params, output,
input,
):
"""Test successful uploading of a file to Zenodo/Dataverse deposit."""
result = runner.invoke(cli, ["dataset", "create", "my-dataset"])
Expand Down Expand Up @@ -568,19 +572,19 @@ def test_dataset_export_upload_tag(
result = runner.invoke(cli, ["dataset", "tag", "my-dataset", "2.0"])
assert 0 == result.exit_code, result.output + str(result.stderr_bytes)

result = runner.invoke(cli, ["dataset", "export", "my-dataset", provider, "-t", "2.0"] + params)
result = runner.invoke(cli, ["dataset", "export", "my-dataset", provider, "-t", "2.0"] + params, input=input)

assert 0 == result.exit_code
assert "Exported to:" in result.output
assert output in result.output

result = runner.invoke(cli, ["dataset", "export", "my-dataset", provider, "-t", "1.0"] + params)
result = runner.invoke(cli, ["dataset", "export", "my-dataset", provider, "-t", "1.0"] + params, input=input)

assert 0 == result.exit_code, result.output + str(result.stderr_bytes)
assert "Exported to:" in result.output
assert output in result.output

result = runner.invoke(cli, ["dataset", "export", "my-dataset", provider] + params, input="1") # HEAD
result = runner.invoke(cli, ["dataset", "export", "my-dataset", provider] + params, input=f"0\n{input}") # HEAD

assert 0 == result.exit_code, result.output + str(result.stderr_bytes)
assert "Exported to:" in result.output
Expand All @@ -590,15 +594,16 @@ def test_dataset_export_upload_tag(
@pytest.mark.integration
@flaky(max_runs=10, min_passes=1)
@pytest.mark.parametrize(
"provider,params,output",
"provider,params,output,input",
[
("zenodo", [], "zenodo.org/deposit"),
("dataverse", ["--dataverse-name", "sdsc-test-dataverse"], "doi:"),
("olos", ["--dlcm-server", "https://sandbox.dlcm.ch/"], "sandbox.dlcm.ch/ingestion/preingest/deposits/"),
("zenodo", [], "zenodo.org/deposit", None),
("dataverse", ["--dataverse-name", "sdsc-test-dataverse"], "doi:", "1"),
("olos", ["--dlcm-server", "https://sandbox.dlcm.ch/"], "sandbox.dlcm.ch/ingestion/preingest/deposits/", None),
],
)
def test_dataset_export_upload_multiple(
runner, project, tmpdir, client, zenodo_sandbox, dataverse_demo, olos_sandbox, provider, params, output
runner, project, tmpdir, client, zenodo_sandbox, dataverse_demo, olos_sandbox, provider, params, output,
input,
):
"""Test successful uploading of a files to Zenodo deposit."""
result = runner.invoke(cli, ["dataset", "create", "my-dataset"])
Expand All @@ -625,7 +630,7 @@ def test_dataset_export_upload_multiple(
data_repo.git.add(update=True)
data_repo.index.commit("metadata updated")

result = runner.invoke(cli, ["dataset", "export", "my-dataset", provider] + params)
result = runner.invoke(cli, ["dataset", "export", "my-dataset", provider] + params, input=input)

assert 0 == result.exit_code, result.output + str(result.stderr_bytes)
assert "Exported to:" in result.output
Expand Down
2 changes: 1 addition & 1 deletion tests/cli/test_save.py
Expand Up @@ -51,7 +51,7 @@ def test_save_with_remote(runner, project, client_with_remote, tmpdir_factory):
fp.write("tracked file")

result = runner.invoke(cli, ["save", "-m", "save changes", "tracked"], catch_exceptions=False)
breakpoint()

assert 0 == result.exit_code
assert "tracked" in result.output
assert "save changes" in client_with_remote.repo.head.commit.message
Expand Down

0 comments on commit ad7b4c8

Please sign in to comment.