Skip to content
This repository has been archived by the owner on Jun 2, 2022. It is now read-only.

Commit

Permalink
PR updates
Browse files Browse the repository at this point in the history
  • Loading branch information
ehanson8 committed Sep 9, 2020
1 parent b2716f1 commit 20757a7
Show file tree
Hide file tree
Showing 5 changed files with 146 additions and 86 deletions.
26 changes: 11 additions & 15 deletions hoard/sources/whoas.py
Expand Up @@ -43,11 +43,10 @@ def create_from_whoas_dim_xml(data: str) -> Dataset:
contacts = [
Contact(
datasetContactName="NAME, FAKE",
datasetContactEmail="FAKE_EMAIL@FAKE_DOMAIN.EDU",
datasetContactEmail="FAKE_EMAIL@EXAMPLE.COM",
)
]
descriptions = []
subjects = []
distributors = []
grantNumbers = []
keywords = []
Expand All @@ -67,19 +66,14 @@ def create_from_whoas_dim_xml(data: str) -> Dataset:
authors.append(
Author(authorName=field.text, authorAffiliation="Woods Hole")
)
else:
authors.append(Author(authorName="", authorAffiliation=""))
if (
field.attrib["element"] == "description"
and "qualifier" in field.attrib
and field.attrib["qualifier"] == "abstract"
):
if field.text is not None:
descriptions.append(Description(dsDescriptionValue=field.text))
else:
descriptions.append(Description(dsDescriptionValue=""))
if field.attrib["element"] == "subject":
subjects.append(field.text)
keywords.append(Keyword(keywordValue=field.text))
if (
field.attrib["element"] == "identifier"
Expand Down Expand Up @@ -116,7 +110,7 @@ def create_from_whoas_dim_xml(data: str) -> Dataset:
if (
field.attrib["element"] == "coverage"
and "qualifier" in field.attrib
and field.attrib["qualifier"] == "spacial"
and field.attrib["qualifier"] == "spatial"
):
kwargs["productionPlace"] = field.text
if field.attrib["element"] == "relation" and "qualifier" not in field.attrib:
Expand All @@ -126,32 +120,34 @@ def create_from_whoas_dim_xml(data: str) -> Dataset:
and "qualifier" in field.attrib
and field.attrib["qualifier"] == "ispartof"
):
kwargs["series"] = Series(seriesInformation=field.text)
kwargs["series"] = Series(seriesName=field.text)
if (
field.attrib["element"] == "coverage"
and "qualifier" in field.attrib
and field.attrib["qualifier"] == "temporal"
):
if field.text is not None and " - " in field.text:
start = field.text[: field.text.index(" - ")]
end = field.text[field.text.index(" - ") + 3 : field.text.index("(UTC")]
dates = field.text.split(" - ")
start = dates[0]
end = dates[1].rstrip(" (UTC)")
timePeriodsCovered.append(
TimePeriodCovered(
timePeriodCoveredStart=start, timePeriodCoveredEnd=end,
)
)
if field.attrib["element"] == "rights" and "qualifier" not in field.attrib:
kwargs["license"] = field.text
kwargs["termsOfUse"] = field.text
if field.attrib["element"] == "rights" and "qualifier" not in field.attrib:
kwargs["license"] = field.text
kwargs["termsOfUse"] = field.text

kwargs["authors"] = authors
kwargs["contacts"] = contacts
kwargs["description"] = descriptions
kwargs["subjects"] = subjects
kwargs["subjects"] = ["Earth and Environmental Sciences"]
kwargs["distributors"] = distributors
kwargs["grantNumbers"] = grantNumbers
kwargs["keywords"] = keywords
kwargs["kindOfData"] = kindOfData
kwargs["otherIds"] = otherIds
kwargs["publications"] = publications
kwargs["timePeriodsCovered"] = timePeriodsCovered
return Dataset(**kwargs)
26 changes: 25 additions & 1 deletion tests/data/whoas/GetRecord_02.xml
Expand Up @@ -6,7 +6,7 @@
<GetRecord>
<record>
<header>
<identifier>oai:darchive.mblwhoilibrary.org:1912/2368</identifier>
<identifier>oai:darchive.mblwhoilibrary.org:1912/2367</identifier>
<datestamp>2016-09-26T17:42:49Z</datestamp>
<setSpec>com_1912_1726</setSpec>
<setSpec>com_1912_1725</setSpec>
Expand All @@ -15,10 +15,34 @@
</header>
<metadata>
<dim:dim xmlns:doc="http://www.lyncode.com/xoai" xmlns:dim="http://www.dspace.org/xmlns/dspace/dim" xsi:schemaLocation="http://www.dspace.org/xmlns/dspace/dim http://www.dspace.org/schema/dim.xsd">
<dim:field mdschema="dc" element="contributor" qualifier="author" authority="3fe71a0a-2805-4c59-829e-cb9664896bfa" confidence="600">Beaulieu, Stace E.</dim:field>
<dim:field mdschema="dc" element="contributor" qualifier="author" authority="3fe71a0a-2805-4c59-829e-cb9664896bfa" confidence="600">Brickley, Annette</dim:field>
<dim:field mdschema="dc" element="coverage" qualifier="temporal">2019-06-04 - 2019-06-04(UTC)</dim:field>
<dim:field mdschema="dc" element="date" qualifier="accessioned">2019-06-07T17:41:39Z</dim:field>
<dim:field mdschema="dc" element="date" qualifier="available">2019-06-07T17:41:39Z</dim:field>
<dim:field mdschema="dc" element="date" qualifier="issued">2019-06-04</dim:field>
<dim:field mdschema="dc" element="identifier" qualifier="uri">https://hdl.handle.net/1912/2368</dim:field>
<dim:field mdschema="dc" element="identifier" qualifier="doi">10.26025/8ke9-av98</dim:field>
<dim:field mdschema="dc" element="description" lang="en_US">This zipped file, “animals-on-the-move_ver1_vents_ver2.zip”, contains educational materials and datasets produced for spherical display systems. The zipped file contains 2 main folders: Animals_on_the_Move_ver1 as Version 1 of new content prepared for NOAA’s Science on a Sphere®, and vent_discoveries_pngs_updated_to_2016 as Version 2 of global datasets contained within Global Viewport to Deep‐Sea Vents: Dataset for Spherical Display Systems, DOI:10.1575/1912/6867. The unzipped contents are 785 MB, with a total of 100 files in 32 folders.</dim:field>
<dim:field mdschema="dc" element="description">This educational package, “animals-on-the-move_ver1_vents_ver2.zip”, is Copyright ©2019 Woods Hole Oceanographic Institution. Deep-sea vent discoveries and locations were derived from the InterRidge Vents Database Version 3.4 (https://vents-data.interridge.org/); thus, a subset of the materials in this package – specifically, the global datasets that have the filenames "vent_YYYY.png" in which YYYY is the year – is licensed under a Creative Commons Attribution‐Noncommercial‐ ShareAlike 3.0 Unported License (CC BY‐NC‐SA 3.0; https://creativecommons.org/licenses/by‐ncsa/ 3.0/). For images and mp4 files that we provided to accompany the Live Program, please see full credits and licensing in the Dataset Description document.</dim:field>
<dim:field mdschema="dc" element="description" qualifier="abstract" lang="en_US">This educational package was developed.</dim:field>]
<dim:field mdschema="dc" element="description" qualifier="abstract" lang="en">Sample abstract</dim:field>
<dim:field mdschema="dc" element="description" qualifier="provenance" lang="en">Approved for entry into archive by Deborah Roth (droth@whoi.edu) on 2019-06-07T17:41:39Z (GMT) No. of bitstreams: 1
animals-on-the-move_ver1_vents_ver2.zip: 820943968 bytes, checksum: 0ba7e089cb52f2ad7c30f16f58020082 (MD5)</dim:field>
<dim:field mdschema="dc" element="description" qualifier="provenance" lang="en">Made available in DSpace on 2019-06-07T17:41:39Z (GMT). No. of bitstreams: 1
animals-on-the-move_ver1_vents_ver2.zip: 820943968 bytes, checksum: 0ba7e089cb52f2ad7c30f16f58020082 (MD5)
Previous issue date: 2019-06-04</dim:field>
<dim:field mdschema="dc" element="description" qualifier="sponsorship" lang="en_US">Funding for this educational package.</dim:field>
<dim:field mdschema="dc" element="publisher">Esteemed Publishing Conglomerate</dim:field>
<dim:field mdschema="dc" element="relation">Associated publication</dim:field>
<dim:field mdschema="dc" element="relation" qualifier="ispartof">https://hdl.handle.net/1912/6867</dim:field>
<dim:field mdschema="dc" element="relation" qualifier="uri">https://doi.org/10.26025/8ke9-av98</dim:field>
<dim:field mdschema="dc" element="rights">Attribution 4.0 International</dim:field>
<dim:field mdschema="dc" element="rights" qualifier="uri">http://creativecommons.org/licenses/by/4.0/</dim:field>
<dim:field mdschema="dc" element="subject" lang="en_US">Migration</dim:field>
<dim:field mdschema="dc" element="subject" lang="en_US">Larval dispersal</dim:field>
<dim:field mdschema="dc" element="title" lang="en_US">Animals on the Move and Deep‐Sea Vents: Dataset for Spherical Display Systems</dim:field>
<dim:field mdschema="dc" element="type" lang="en_US">Dataset</dim:field>
</dim:dim>
</metadata>
</record>
Expand Down
17 changes: 4 additions & 13 deletions tests/test_source.py → tests/test_jpal.py
@@ -1,8 +1,7 @@
import requests_mock
from unittest.mock import MagicMock

from hoard.sources.jpal import JPAL
from hoard.sources.whoas import WHOAS
from hoard.sources.jpal import create_from_dataverse_json, JPAL


def test_jpal_returns_datasets(
Expand All @@ -20,14 +19,6 @@ def test_jpal_returns_datasets(
assert next(jpal) == dataset


def test_whoas_returns_datasets(dspace_oai_xml_records):
oai_client = MagicMock()
oai_client.__next__.return_value = next(iter(dspace_oai_xml_records))
with requests_mock.Mocker() as m:
m.get(
"http+mock://example.com/oai", text=dspace_oai_xml_records[0],
)
whoas = WHOAS(oai_client)
assert (
next(whoas).title == "The Title"
) # Not sure how deep we want to go with the testing
def test_create_dataset_from_dataverse_json(dataverse_minimal_json_record):
dataset = create_from_dataverse_json(dataverse_minimal_json_record)
assert dataset.asdict() == dataverse_minimal_json_record
57 changes: 0 additions & 57 deletions tests/test_models.py
@@ -1,6 +1,5 @@
import json

from hoard.sources.jpal import create_from_dataverse_json
from hoard.models import (
Author,
Contact,
Expand All @@ -16,7 +15,6 @@
Series,
TimePeriodCovered,
)
from hoard.sources.whoas import create_from_whoas_dim_xml


def test_minimal_dataset(dataverse_minimal_json_record):
Expand Down Expand Up @@ -116,58 +114,3 @@ def test_full_dataset(dataverse_full_json_record):
assert json.dumps(new_record.asdict(), sort_keys=True) == json.dumps(
dataverse_full_json_record, sort_keys=True
)


def test_create_dataset_from_dataverse_json(dataverse_minimal_json_record):
dataset = create_from_dataverse_json(dataverse_minimal_json_record)
assert dataset.asdict() == dataverse_minimal_json_record


def test_create_whoas_dim_xml(whoas_oai_server):
title = (
"Animals on the Move and Deep‐Sea Vents: Dataset for Spherical Display Systems"
)
authors = [
Author(
authorName="Beaulieu, Stace E.",
authorAffiliation="Woods Hole",
authorIdentifierScheme=None,
authorIdentifier=None,
),
Author(
authorName="Brickley, Annette",
authorAffiliation="Woods Hole",
authorIdentifierScheme=None,
authorIdentifier=None,
),
]
contacts = [
Contact(
datasetContactName="NAME, FAKE",
datasetContactEmail="FAKE_EMAIL@FAKE_DOMAIN.EDU",
)
]
description = [
Description(
dsDescriptionValue="This educational package was developed.",
dsDescriptionDate=None,
),
Description(dsDescriptionValue="Sample abstract", dsDescriptionDate=None,),
]
subjects = [
"Migration",
"Larval dispersal",
]
dataset = create_from_whoas_dim_xml(whoas_oai_server[0])
assert dataset.title == title
assert dataset.authors == authors
assert dataset.contacts == contacts
assert dataset.description == description
assert dataset.subjects == subjects

dataset = create_from_whoas_dim_xml(whoas_oai_server[1])
assert dataset.title == title
assert dataset.authors == []
assert dataset.contacts == contacts
assert dataset.description == []
assert dataset.subjects == []
106 changes: 106 additions & 0 deletions tests/test_whoas.py
@@ -0,0 +1,106 @@
import requests_mock
from unittest.mock import MagicMock

from hoard.models import (
Author,
Contact,
Description,
Distributor,
GrantNumber,
Keyword,
OtherId,
Publication,
Series,
TimePeriodCovered,
)
from hoard.sources.whoas import create_from_whoas_dim_xml, WHOAS


def test_create_whoas_required_dim_xml(whoas_oai_server):
title = (
"Animals on the Move and Deep‐Sea Vents: Dataset for Spherical Display Systems"
)
authors = [
Author(
authorName="Beaulieu, Stace E.",
authorAffiliation="Woods Hole",
authorIdentifierScheme=None,
authorIdentifier=None,
),
Author(
authorName="Brickley, Annette",
authorAffiliation="Woods Hole",
authorIdentifierScheme=None,
authorIdentifier=None,
),
]
contacts = [
Contact(
datasetContactName="NAME, FAKE",
datasetContactEmail="FAKE_EMAIL@EXAMPLE.COM",
)
]
description = [
Description(
dsDescriptionValue="This educational package was developed.",
dsDescriptionDate=None,
),
Description(dsDescriptionValue="Sample abstract", dsDescriptionDate=None,),
]
distributors = [Distributor(distributorName="Esteemed Publishing Conglomerate")]
grantNumbers = [
GrantNumber(
grantNumberValue="Funding for this educational package.",
grantNumberAgency="Funding for this educational package.",
)
]
keywords = [
Keyword(keywordValue="Migration"),
Keyword(keywordValue="Larval dispersal"),
]
otherIds = [
OtherId(otherIdValue="https://hdl.handle.net/1912/2368", otherIdAgency=None),
OtherId(otherIdValue="10.26025/8ke9-av98", otherIdAgency=None),
]
publications = [Publication(publicationCitation="Associated publication")]
series = Series(seriesName="https://hdl.handle.net/1912/6867")

timePeriodsCovered = [
TimePeriodCovered(
timePeriodCoveredStart="2019-06-04", timePeriodCoveredEnd="2019-06-04",
)
]
subjects = ["Earth and Environmental Sciences"]
dataset = create_from_whoas_dim_xml(whoas_oai_server[0])
assert dataset.title == title
assert dataset.authors == authors
assert dataset.contacts == contacts
assert dataset.description == description
assert dataset.subjects == subjects

dataset = create_from_whoas_dim_xml(whoas_oai_server[1])
assert dataset.title == title
assert dataset.authors == authors
assert dataset.contacts == contacts
assert dataset.description == description
assert dataset.subjects == subjects
assert dataset.distributors == distributors
assert dataset.grantNumbers == grantNumbers
assert dataset.keywords == keywords
assert dataset.otherIds == otherIds
assert dataset.publications == publications
assert dataset.series == series
assert dataset.timePeriodsCovered == timePeriodsCovered


def test_whoas_returns_datasets(dspace_oai_xml_records):
oai_client = MagicMock()
oai_client.__next__.return_value = next(iter(dspace_oai_xml_records))
with requests_mock.Mocker() as m:
m.get(
"http+mock://example.com/oai", text=dspace_oai_xml_records[0],
)
whoas = WHOAS(oai_client)
assert (
next(whoas).title == "The Title"
) # Not sure how deep we want to go with the testing

0 comments on commit 20757a7

Please sign in to comment.