Skip to content
This repository has been archived by the owner on Jun 2, 2022. It is now read-only.

Commit

Permalink
Merge pull request #35 from MITLibraries/time-format
Browse files Browse the repository at this point in the history
date validation
  • Loading branch information
ehanson8 committed Oct 7, 2020
2 parents 065538b + 656a078 commit e530580
Show file tree
Hide file tree
Showing 4 changed files with 104 additions and 4 deletions.
23 changes: 19 additions & 4 deletions hoard/sources/whoas.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from datetime import datetime
from typing import Any, Dict, Iterator
from urllib.parse import urlparse

Expand Down Expand Up @@ -96,8 +97,13 @@ def create_from_whoas_dim_xml(data: str, client: OAIClient) -> Dataset:
field.attrib["element"] == "date"
and "qualifier" in field.attrib
and field.attrib["qualifier"] == "issued"
and field.text is not None
):
kwargs["distributionDate"] = field.text
try:
datetime.strptime(field.text, "%Y-%m-%d")
kwargs["distributionDate"] = field.text
except ValueError:
pass
if field.attrib["element"] == "publisher":
kwargs.setdefault("distributors", []).append(
Distributor(distributorName=field.text)
Expand Down Expand Up @@ -159,10 +165,19 @@ def create_from_whoas_dim_xml(data: str, client: OAIClient) -> Dataset:
dates = field.text.split(" - ")
start = dates[0]
end = dates[1].rstrip(" (UTC)")
time_kwargs = {}
try:
datetime.strptime(start, "%Y-%m-%d")
time_kwargs["timePeriodCoveredStart"] = start
except ValueError:
pass
try:
datetime.strptime(end, "%Y-%m-%d")
time_kwargs["timePeriodCoveredEnd"] = end
except ValueError:
pass
kwargs.setdefault("timePeriodsCovered", []).append(
TimePeriodCovered(
timePeriodCoveredStart=start, timePeriodCoveredEnd=end,
)
TimePeriodCovered(**time_kwargs)
)
if field.attrib["element"] == "rights" and "qualifier" not in field.attrib:
kwargs["license"] = field.text
Expand Down
4 changes: 4 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,9 @@ def whoas_oai_server(requests_mock, shared_datadir, request):
"oai:darchive.mblwhoilibrary.org:1912/2371": (
shared_datadir / "whoas/GetRecord_05.xml"
).read_text(),
"oai:darchive.mblwhoilibrary.org:1912/2372": (
shared_datadir / "whoas/GetRecord_06.xml"
).read_text(),
}
requests_mock.get(
f"{url}?verb=ListIdentifiers",
Expand All @@ -119,6 +122,7 @@ def whoas_oai_server(requests_mock, shared_datadir, request):
records["oai:darchive.mblwhoilibrary.org:1912/2369"],
records["oai:darchive.mblwhoilibrary.org:1912/2370"],
records["oai:darchive.mblwhoilibrary.org:1912/2371"],
records["oai:darchive.mblwhoilibrary.org:1912/2372"],
]


Expand Down
50 changes: 50 additions & 0 deletions tests/data/whoas/GetRecord_06.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="static/style.xsl"?>
<OAI-PMH xmlns="http://www.openarchives.org/OAI/2.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd">
<responseDate>2020-08-12T18:18:43Z</responseDate>
<request verb="GetRecord" identifier="oai:darchive.mblwhoilibrary.org:1912/2372" metadataPrefix="oai_dc">https://darchive.mblwhoilibrary.org/oai/request</request>
<GetRecord>
<record>
<header>
<identifier>oai:darchive.mblwhoilibrary.org:1912/2372</identifier>
<datestamp>2016-09-26T17:42:49Z</datestamp>
<setSpec>com_1912_1726</setSpec>
<setSpec>com_1912_1725</setSpec>
<setSpec>com_1912_4</setSpec>
<setSpec>col_1912_2364</setSpec>
</header>
<metadata>
<dim:dim xmlns:doc="http://www.lyncode.com/xoai" xmlns:dim="http://www.dspace.org/xmlns/dspace/dim" xsi:schemaLocation="http://www.dspace.org/xmlns/dspace/dim http://www.dspace.org/schema/dim.xsd">
<dim:field mdschema="dc" element="contributor" qualifier="author" authority="3fe71a0a-2805-4c59-829e-cb9664896bfa" confidence="600">Beaulieu, Stace E.</dim:field>
<dim:field mdschema="dc" element="contributor" qualifier="author" authority="3fe71a0a-2805-4c59-829e-cb9664896bfa" confidence="600">Brickley, Annette</dim:field>
<dim:field mdschema="dc" element="coverage" qualifier="temporal">2019-06-04 - present</dim:field>
<dim:field mdschema="dc" element="date" qualifier="accessioned">2019-06-07T17:41:39Z</dim:field>
<dim:field mdschema="dc" element="date" qualifier="available">2019-06-07T17:41:39Z</dim:field>
<dim:field mdschema="dc" element="date" qualifier="issued">2019-06-04</dim:field>
<dim:field mdschema="dc" element="identifier" qualifier="uri">https://hdl.handle.net/1912/2372</dim:field>
<dim:field mdschema="dc" element="description" lang="en_US">This zipped file contains educational materials.</dim:field>
<dim:field mdschema="dc" element="description">This educational package is Copyright ©2019 Woods Hole Oceanographic Institution.</dim:field>
<dim:field mdschema="dc" element="description" qualifier="abstract" lang="en_US">This educational package was developed.</dim:field>]
<dim:field mdschema="dc" element="description" qualifier="abstract" lang="en">Sample abstract</dim:field>
<dim:field mdschema="dc" element="description" qualifier="provenance" lang="en">Approved for entry into archive by Deborah Roth (droth@whoi.edu) on 2019-06-07T17:41:39Z (GMT) No. of bitstreams: 1
animals-on-the-move_ver1_vents_ver2.zip: 820943968 bytes, checksum: 0ba7e089cb52f2ad7c30f16f58020082 (MD5)</dim:field>
<dim:field mdschema="dc" element="description" qualifier="provenance" lang="en">Made available in DSpace on 2019-06-07T17:41:39Z (GMT). No. of bitstreams: 1
animals-on-the-move_ver1_vents_ver2.zip: 820943968 bytes, checksum: 0ba7e089cb52f2ad7c30f16f58020082 (MD5)
Previous issue date: 2019-06-04</dim:field>
<dim:field mdschema="dc" element="description" qualifier="sponsorship" lang="en_US">Funding for this educational package.</dim:field>
<dim:field mdschema="dc" element="publisher">Esteemed Publishing Conglomerate</dim:field>
<dim:field mdschema="dc" element="language" qualifier="iso">en_US</dim:field>
<dim:field mdschema="dc" element="relation">Associated publication</dim:field>
<dim:field mdschema="dc" element="relation" qualifier="ispartof">https://hdl.handle.net/1912/6867</dim:field>
<dim:field mdschema="dc" element="relation" qualifier="uri">https://doi.org/10.26025/8ke9-av98</dim:field>
<dim:field mdschema="dc" element="rights">Attribution 4.0 International</dim:field>
<dim:field mdschema="dc" element="rights" qualifier="uri">http://creativecommons.org/licenses/by/4.0/</dim:field>
<dim:field mdschema="dc" element="subject" lang="en_US">Migration</dim:field>
<dim:field mdschema="dc" element="subject" lang="en_US">Larval dispersal</dim:field>
<dim:field mdschema="dc" element="title" lang="en_US">Animals on the Move and Deep‐Sea Vents: Dataset for Spherical Display Systems</dim:field>
<dim:field mdschema="dc" element="type" lang="en_US">Dataset</dim:field>
</dim:dim>
</metadata>
</record>
</GetRecord>
</OAI-PMH>
31 changes: 31 additions & 0 deletions tests/test_whoas.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,9 @@ def test_create_whoas_dim_xml(whoas_oai_server, dspace_oai_xml_series_name_recor
),
OtherId(otherIdValue="10.26025/8ke9-av98", otherIdAgency=None),
]
otherIds_2 = [
OtherId(otherIdValue="https://hdl.handle.net/1912/2372", otherIdAgency=None)
]
publications = [Publication(publicationCitation="Associated publication")]
series = Series(
seriesName="Series Title",
Expand All @@ -90,6 +93,12 @@ def test_create_whoas_dim_xml(whoas_oai_server, dspace_oai_xml_series_name_recor
timePeriodCoveredStart="2019-06-04", timePeriodCoveredEnd="2019-06-04",
)
]

partial_timePeriodsCovered = [
TimePeriodCovered(timePeriodCoveredStart="2019-06-04")
]

# minimal record
subjects = ["Earth and Environmental Sciences"]
dataset = create_from_whoas_dim_xml(whoas_oai_server[0], client)
assert dataset.title == title
Expand All @@ -98,6 +107,7 @@ def test_create_whoas_dim_xml(whoas_oai_server, dspace_oai_xml_series_name_recor
assert dataset.description == description
assert dataset.subjects == subjects

# full record
dataset = create_from_whoas_dim_xml(whoas_oai_server[1], client)
for _k, v in dataset.__dict__.items():
assert v != []
Expand All @@ -118,6 +128,27 @@ def test_create_whoas_dim_xml(whoas_oai_server, dspace_oai_xml_series_name_recor
assert dataset.license == "Attribution 4.0 International"
assert dataset.termsOfUse == "Attribution 4.0 International"

# record with invalid date
dataset = create_from_whoas_dim_xml(whoas_oai_server[5], client)
for _k, v in dataset.__dict__.items():
assert v != []
assert dataset.title == title
assert dataset.authors == authors
assert dataset.contacts == contacts
assert dataset.description == description
assert dataset.subjects == subjects
assert dataset.distributors == distributors
assert dataset.grantNumbers == grantNumbers
assert dataset.keywords == keywords
assert dataset.language == ["English"]
assert dataset.notesText == notesText
assert dataset.otherIds == otherIds_2
assert dataset.publications == publications
assert dataset.series == series
assert dataset.timePeriodsCovered == partial_timePeriodsCovered
assert dataset.license == "Attribution 4.0 International"
assert dataset.termsOfUse == "Attribution 4.0 International"


def test_whoas_returns_datasets(whoas_oai_server):
oai_client = MagicMock()
Expand Down

0 comments on commit e530580

Please sign in to comment.