Skip to content
This repository has been archived by the owner on Jun 2, 2022. It is now read-only.

Commit

Permalink
Filter empty/None optional values out of Dataset asdict() return value
Browse files Browse the repository at this point in the history
Updates the helper functions to return None for empty or None values,
and filters the final fields list to remove the None entries.
  • Loading branch information
hakbailey committed Sep 28, 2020
1 parent 58d8c8a commit dfcc253
Show file tree
Hide file tree
Showing 4 changed files with 370 additions and 104 deletions.
220 changes: 116 additions & 104 deletions hoard/models.py
@@ -1,4 +1,4 @@
from typing import Any, Dict, List, Optional
from typing import Any, Dict, List, Optional, Union

import attr

Expand Down Expand Up @@ -105,23 +105,24 @@ class Dataset:
termsOfUse: Optional[str] = None

def asdict(self) -> dict:
fields = [primitive(self.title, "title")]
if self.alternativeURL is not None:
fields.append(primitive(self.alternativeURL, "alternativeURL"))
authors = compound(
self.authors,
"author",
[
"authorName",
"authorAffiliation",
"authorIdentifierScheme",
"authorIdentifier",
],
fields: List[Optional[Dict[str, Any]]] = [primitive(self.title, "title")]

fields.append(primitive(self.alternativeURL, "alternativeURL"))

fields.append(
compound(
self.authors,
"author",
[
"authorName",
"authorAffiliation",
"authorIdentifierScheme",
"authorIdentifier",
],
controlled_subfield="authorIdentifierScheme",
)
)
for author in authors["value"]:
if "authorIdentifierScheme" in author:
author["authorIdentifierScheme"]["typeClass"] = "controlledVocabulary"
fields.append(authors)

fields.append(
compound(
self.contacts,
Expand All @@ -133,67 +134,59 @@ def asdict(self) -> dict:
],
)
)
if self.contributors is not None:
contributors = compound(

fields.append(
compound(
self.contributors,
"contributor",
["contributorName", "contributorType"],
controlled_subfield="contributorType",
)
for contributor in contributors["value"]:
contributor["contributorType"]["typeClass"] = "controlledVocabulary"
fields.append(contributors)
)

fields.append(
compound(
self.description,
"dsDescription",
["dsDescriptionValue", "dsDescriptionDate"],
)
)
if self.distributionDate is not None:
fields.append(primitive(self.distributionDate, "distributionDate"))
if self.distributors is not None:
fields.append(
compound(
self.distributors,
"distributor",
["distributorName", "distributorURL"],
)
)
if self.grantNumbers is not None:
fields.append(
compound(
self.grantNumbers,
"grantNumber",
["grantNumberValue", "grantNumberAgency"],
)
)
if self.keywords is not None:
fields.append(compound(self.keywords, "keyword", ["keywordValue"]))
if self.kindOfData is not None:
fields.append(
{
"typeName": "kindOfData",
"multiple": True,
"typeClass": "primitive",
"value": self.kindOfData,
}
)
if self.language is not None:
fields.append(controlled(self.language, "language"))
if self.notesText is not None:
fields.append(primitive(self.notesText, "notesText"))
if self.otherIds is not None:
fields.append(
compound(self.otherIds, "otherId", ["otherIdValue", "otherIdAgency"],)

fields.append(primitive(self.distributionDate, "distributionDate"))

fields.append(
compound(
self.distributors, "distributor", ["distributorName", "distributorURL"],
)
if self.producers is not None:
fields.append(
compound(self.producers, "producer", ["producerName", "producerURL"],)
)

fields.append(
compound(
self.grantNumbers,
"grantNumber",
["grantNumberValue", "grantNumberAgency"],
)
if self.productionPlace is not None:
fields.append(primitive(self.productionPlace, "productionPlace"))
if self.publications is not None:
publications = compound(
)
fields.append(compound(self.keywords, "keyword", ["keywordValue"]))

fields.append(primitive(self.kindOfData, "kindOfData", multiple=True))

fields.append(controlled(self.language, "language"))

fields.append(primitive(self.notesText, "notesText"))

fields.append(
compound(self.otherIds, "otherId", ["otherIdValue", "otherIdAgency"],)
)

fields.append(
compound(self.producers, "producer", ["producerName", "producerURL"],)
)

fields.append(primitive(self.productionPlace, "productionPlace"))

fields.append(
compound(
self.publications,
"publication",
[
Expand All @@ -202,80 +195,95 @@ def asdict(self) -> dict:
"publicationIDType",
"publicationURL",
],
controlled_subfield="publicationIDType",
)
for publication in [
p for p in publications["value"] if "publicationIDType" in p
]:
publication["publicationIDType"]["typeClass"] = "controlledVocabulary"
fields.append(publications)
)

if self.series is not None:
fields.append(
{
"typeName": "series",
"multiple": False,
"typeClass": "compound",
"value": {
"seriesName": {
"multiple": False,
"typeClass": "primitive",
"typeName": "seriesName",
"value": self.series.seriesName,
},
"seriesInformation": {
"multiple": False,
"typeClass": "primitive",
"typeName": "seriesInformation",
"value": self.series.seriesInformation,
},
},
}
)
fields.append(controlled(self.subjects, "subject"))
if self.timePeriodsCovered is not None:
fields.append(
compound(
self.timePeriodsCovered,
"timePeriodCovered",
["timePeriodCoveredStart", "timePeriodCoveredEnd"],
series: Dict[str, Any] = {
"typeName": "series",
"multiple": False,
"typeClass": "compound",
"value": {},
}
if self.series.seriesName:
series["value"]["seriesName"] = primitive(
self.series.seriesName, "seriesName"
)
if self.series.seriesInformation:
series["value"]["seriesInformation"] = primitive(
self.series.seriesInformation, "seriesInformation"
)
fields.append(series)

fields.append(controlled(self.subjects, "subject"))

fields.append(
compound(
self.timePeriodsCovered,
"timePeriodCovered",
["timePeriodCoveredStart", "timePeriodCoveredEnd"],
)
)

result: Dict[str, Any] = {
"datasetVersion": {
"metadataBlocks": {
"citation": {"displayName": "Citation Metadata", "fields": fields}
"citation": {
"displayName": "Citation Metadata",
"fields": list(filter(None, fields)),
}
}
}
}

if self.license is not None:
result["datasetVersion"]["license"] = self.license
if self.termsOfUse is not None:
result["datasetVersion"]["termsOfUse"] = self.termsOfUse

return result


# Dataverse metadata block types


def compound(values: List, type_name: str, subtype_names: List[str]) -> dict:
def compound(
values: Optional[List],
type_name: str,
subtype_names: List[str],
controlled_subfield: Optional[str] = None,
) -> Optional[dict]:
if not values:
return None
result: Dict[str, Any] = {
"value": [],
"typeClass": "compound",
"multiple": True,
"typeName": type_name,
}
for v in values:
for i, v in enumerate(values):
result["value"].append(
{
subtype: primitive(getattr(v, subtype), subtype)
for subtype in subtype_names
if getattr(v, subtype) is not None
}
)
if (
controlled_subfield is not None
and controlled_subfield in result["value"][i]
):
result["value"][i][controlled_subfield][
"typeClass"
] = "controlledVocabulary"

return result


def controlled(values: List[str], type_name: str) -> dict:
def controlled(values: Optional[List[str]], type_name: str) -> Optional[dict]:
if not values:
return None
result = {
"value": values,
"typeClass": "controlledVocabulary",
Expand All @@ -286,11 +294,15 @@ def controlled(values: List[str], type_name: str) -> dict:
return result


def primitive(value: str, type_name: str) -> dict:
def primitive(
value: Optional[Union[List, str]], type_name: str, multiple: bool = False
) -> Optional[dict]:
if not value:
return None
result = {
"value": value,
"typeClass": "primitive",
"multiple": False,
"multiple": multiple,
"typeName": type_name,
}

Expand Down
7 changes: 7 additions & 0 deletions tests/conftest.py
Expand Up @@ -35,6 +35,13 @@ def dataverse_minimal_json_record(shared_datadir):
return r


@pytest.fixture
def dataverse_partial_json_record(shared_datadir):
f = (shared_datadir / "partial_fields.json").read_text()
r = json.loads(f)
return r


@pytest.fixture
def dataverse_full_json_record(shared_datadir):
f = (shared_datadir / "all_fields.json").read_text()
Expand Down

0 comments on commit dfcc253

Please sign in to comment.