From ad7b4c83f343eac8c01a573c5aeda4048d762347 Mon Sep 17 00:00:00 2001 From: Ralf Grubenmann Date: Thu, 14 Oct 2021 18:36:00 +0200 Subject: [PATCH] fix(core): add missing Subject to dataverse export (#2420) --- renku/core/commands/providers/dataverse.py | 30 +++++++++++++ .../providers/dataverse_metadata_templates.py | 2 +- tests/cli/test_integration_datasets.py | 45 ++++++++++--------- tests/cli/test_save.py | 2 +- 4 files changed, 57 insertions(+), 22 deletions(-) diff --git a/renku/core/commands/providers/dataverse.py b/renku/core/commands/providers/dataverse.py index 78e5af4c9d..9ccc7dc788 100644 --- a/renku/core/commands/providers/dataverse.py +++ b/renku/core/commands/providers/dataverse.py @@ -34,6 +34,7 @@ from renku.core.commands.providers.doi import DOIProvider from renku.core.models.datasets import Dataset, DatasetFile, DatasetSchema from renku.core.models.provenance.agents import PersonSchema +from renku.core.utils import communication from renku.core.utils.doi import extract_doi, is_doi from renku.core.utils.requests import retry @@ -48,6 +49,24 @@ DATAVERSE_EXPORTER = "schema.org" +DATAVERSE_SUBJECTS = [ + "Agricultural Sciences", + "Arts and Humanities", + "Astronomy and Astrophysics", + "Business and Management", + "Chemistry", + "Computer and Information Science", + "Earth and Environmental Sciences", + "Engineering", + "Law", + "Mathematical Sciences", + "Medicine, Health and Life Sciences", + "Physics", + "Social Sciences", + "Other", +] + + class _DataverseDatasetSchema(DatasetSchema): """Schema for Dataverse datasets.""" @@ -411,15 +430,26 @@ def export(self, publish, **kwargs): def _get_dataset_metadata(self): authors, contacts = self._get_creators() + subject = self._get_subject() metadata_template = Template(DATASET_METADATA_TEMPLATE) metadata = metadata_template.substitute( name=_escape_json_string(self.dataset.title), authors=json.dumps(authors), contacts=json.dumps(contacts), description=_escape_json_string(self.dataset.description), + subject=subject, ) return json.loads(metadata) + def _get_subject(self): + text_prompt = "Subject of this dataset: \n\n" + text_prompt += "\n".join(f"{s}\t[{i}]" for i, s in enumerate(DATAVERSE_SUBJECTS, start=1)) + text_prompt += "\n\nSubject" + + selection = communication.prompt(text_prompt, type=int, default=len(DATAVERSE_SUBJECTS)) or 0 + + return DATAVERSE_SUBJECTS[selection - 1] + def _get_creators(self): authors = [] contacts = [] diff --git a/renku/core/commands/providers/dataverse_metadata_templates.py b/renku/core/commands/providers/dataverse_metadata_templates.py index 7e55dd5134..d6061ad132 100644 --- a/renku/core/commands/providers/dataverse_metadata_templates.py +++ b/renku/core/commands/providers/dataverse_metadata_templates.py @@ -56,7 +56,7 @@ "typeName": "dsDescription" }, { - "value": [], + "value": ["${subject}"], "typeClass": "controlledVocabulary", "multiple": true, "typeName": "subject" diff --git a/tests/cli/test_integration_datasets.py b/tests/cli/test_integration_datasets.py index a053714021..58335f004b 100644 --- a/tests/cli/test_integration_datasets.py +++ b/tests/cli/test_integration_datasets.py @@ -479,15 +479,16 @@ def test_renku_dataset_import_missing_lfs_objects(runner, project): @pytest.mark.integration @flaky(max_runs=10, min_passes=1) @pytest.mark.parametrize( - "provider,params,output", + "provider,params,output,input", [ - ("zenodo", [], "zenodo.org/deposit"), - ("dataverse", ["--dataverse-name", "sdsc-test-dataverse"], "doi:"), - ("olos", ["--dlcm-server", "https://sandbox.dlcm.ch/"], "sandbox.dlcm.ch/ingestion/preingest/deposits/"), + ("zenodo", [], "zenodo.org/deposit", None), + ("dataverse", ["--dataverse-name", "sdsc-test-dataverse"], "doi:", "1"), + ("olos", ["--dlcm-server", "https://sandbox.dlcm.ch/"], "sandbox.dlcm.ch/ingestion/preingest/deposits/", None), ], ) def test_dataset_export_upload_file( - runner, project, tmpdir, client, zenodo_sandbox, dataverse_demo, olos_sandbox, provider, params, output + runner, project, tmpdir, client, zenodo_sandbox, dataverse_demo, olos_sandbox, provider, params, output, + input, ): """Test successful uploading of a file to Zenodo/Dataverse deposit.""" result = runner.invoke(cli, ["dataset", "create", "my-dataset"]) @@ -511,7 +512,9 @@ def test_dataset_export_upload_file( data_repo.git.add(update=True) data_repo.index.commit("metadata updated") - result = runner.invoke(cli, ["dataset", "export", "my-dataset", provider] + params) + result = runner.invoke( + cli, ["dataset", "export", "my-dataset", provider] + params, input=input, catch_exceptions=False + ) assert 0 == result.exit_code, result.output + str(result.stderr_bytes) assert "Exported to:" in result.output @@ -521,15 +524,16 @@ def test_dataset_export_upload_file( @pytest.mark.integration @flaky(max_runs=10, min_passes=1) @pytest.mark.parametrize( - "provider,params,output", + "provider,params,output,input", [ - ("zenodo", [], "zenodo.org/deposit"), - ("dataverse", ["--dataverse-name", "sdsc-test-dataverse"], "doi:"), - ("olos", ["--dlcm-server", "https://sandbox.dlcm.ch/"], "sandbox.dlcm.ch/ingestion/preingest/deposits/"), + ("zenodo", [], "zenodo.org/deposit", None), + ("dataverse", ["--dataverse-name", "sdsc-test-dataverse"], "doi:", "1"), + ("olos", ["--dlcm-server", "https://sandbox.dlcm.ch/"], "sandbox.dlcm.ch/ingestion/preingest/deposits/", None), ], ) def test_dataset_export_upload_tag( - runner, project, tmpdir, client, zenodo_sandbox, dataverse_demo, olos_sandbox, provider, params, output + runner, project, tmpdir, client, zenodo_sandbox, dataverse_demo, olos_sandbox, provider, params, output, + input, ): """Test successful uploading of a file to Zenodo/Dataverse deposit.""" result = runner.invoke(cli, ["dataset", "create", "my-dataset"]) @@ -568,19 +572,19 @@ def test_dataset_export_upload_tag( result = runner.invoke(cli, ["dataset", "tag", "my-dataset", "2.0"]) assert 0 == result.exit_code, result.output + str(result.stderr_bytes) - result = runner.invoke(cli, ["dataset", "export", "my-dataset", provider, "-t", "2.0"] + params) + result = runner.invoke(cli, ["dataset", "export", "my-dataset", provider, "-t", "2.0"] + params, input=input) assert 0 == result.exit_code assert "Exported to:" in result.output assert output in result.output - result = runner.invoke(cli, ["dataset", "export", "my-dataset", provider, "-t", "1.0"] + params) + result = runner.invoke(cli, ["dataset", "export", "my-dataset", provider, "-t", "1.0"] + params, input=input) assert 0 == result.exit_code, result.output + str(result.stderr_bytes) assert "Exported to:" in result.output assert output in result.output - result = runner.invoke(cli, ["dataset", "export", "my-dataset", provider] + params, input="1") # HEAD + result = runner.invoke(cli, ["dataset", "export", "my-dataset", provider] + params, input=f"0\n{input}") # HEAD assert 0 == result.exit_code, result.output + str(result.stderr_bytes) assert "Exported to:" in result.output @@ -590,15 +594,16 @@ def test_dataset_export_upload_tag( @pytest.mark.integration @flaky(max_runs=10, min_passes=1) @pytest.mark.parametrize( - "provider,params,output", + "provider,params,output,input", [ - ("zenodo", [], "zenodo.org/deposit"), - ("dataverse", ["--dataverse-name", "sdsc-test-dataverse"], "doi:"), - ("olos", ["--dlcm-server", "https://sandbox.dlcm.ch/"], "sandbox.dlcm.ch/ingestion/preingest/deposits/"), + ("zenodo", [], "zenodo.org/deposit", None), + ("dataverse", ["--dataverse-name", "sdsc-test-dataverse"], "doi:", "1"), + ("olos", ["--dlcm-server", "https://sandbox.dlcm.ch/"], "sandbox.dlcm.ch/ingestion/preingest/deposits/", None), ], ) def test_dataset_export_upload_multiple( - runner, project, tmpdir, client, zenodo_sandbox, dataverse_demo, olos_sandbox, provider, params, output + runner, project, tmpdir, client, zenodo_sandbox, dataverse_demo, olos_sandbox, provider, params, output, + input, ): """Test successful uploading of a files to Zenodo deposit.""" result = runner.invoke(cli, ["dataset", "create", "my-dataset"]) @@ -625,7 +630,7 @@ def test_dataset_export_upload_multiple( data_repo.git.add(update=True) data_repo.index.commit("metadata updated") - result = runner.invoke(cli, ["dataset", "export", "my-dataset", provider] + params) + result = runner.invoke(cli, ["dataset", "export", "my-dataset", provider] + params, input=input) assert 0 == result.exit_code, result.output + str(result.stderr_bytes) assert "Exported to:" in result.output diff --git a/tests/cli/test_save.py b/tests/cli/test_save.py index 69b59c6752..2ab42ed8ba 100644 --- a/tests/cli/test_save.py +++ b/tests/cli/test_save.py @@ -51,7 +51,7 @@ def test_save_with_remote(runner, project, client_with_remote, tmpdir_factory): fp.write("tracked file") result = runner.invoke(cli, ["save", "-m", "save changes", "tracked"], catch_exceptions=False) - breakpoint() + assert 0 == result.exit_code assert "tracked" in result.output assert "save changes" in client_with_remote.repo.head.commit.message