Skip to content

Commit

Permalink
Update Springshare testing and convention following
Browse files Browse the repository at this point in the history
Why these changes are being introduced:
* Updates stemming from code review for PR #99

How this addresses that need:
* updated test naming
* additional tests for OaiDc get_dates() and get_links() hooks
* fallback on default citation generator
* ensure usage of str(<BS4_element.string) for memory concerns

Side effects of this change:
* None

Relevant ticket(s):
* https://mitlibraries.atlassian.net/browse/TIMX-227
  • Loading branch information
ghukill committed Jul 28, 2023
1 parent 1c95fe0 commit 982f569
Show file tree
Hide file tree
Showing 10 changed files with 198 additions and 76 deletions.
12 changes: 12 additions & 0 deletions tests/fixtures/oai_dc/springshare/springshare_invalid_dates.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
<records>
<record xmlns:dc="http://purl.org/dc/elements/1.1/">
<dc:date>200000000-01-01</dc:date>
</record>
<record xmlns:dc="http://purl.org/dc/elements/1.1/">
<dc:date>Not found</dc:date>
</record>
<record xmlns:dc="http://purl.org/dc/elements/1.1/">
<dc:date></dc:date>
</record>
<record xmlns:dc="http://purl.org/dc/elements/1.1/"></record>
</records>
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
<?xml version="1.0" encoding="UTF-8"?>
<records>
<record xmlns="http://www.openarchives.org/OAI/2.0/"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<header>
<identifier>oai:libguides.com:guides/175846</identifier>
<datestamp>2023-05-31T19:49:21Z</datestamp>
<setSpec>guides</setSpec>
</header>
<metadata>
<oai_dc:dc xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd">
<dc:creator>Ye Li</dc:creator>
<dc:subject>Engineering</dc:subject>
<dc:subject>Science</dc:subject>
<dc:description>Useful databases and other research tips for materials science.</dc:description>
<dc:publisher>MIT Libraries</dc:publisher>
<dc:date>2008-06-19T17:55:27</dc:date>
</oai_dc:dc>
</metadata>
</record>
</records>
8 changes: 8 additions & 0 deletions tests/fixtures/oai_dc/springshare/springshare_valid_dates.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
<records>
<record xmlns:dc="http://purl.org/dc/elements/1.1/">
<dc:date>2000-01-01</dc:date>
</record>
<record xmlns:dc="http://purl.org/dc/elements/1.1/">
<dc:date>January 1st, 2000</dc:date>
</record>
</records>
16 changes: 8 additions & 8 deletions tests/springshare/test_libguides.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@
FIXTURES_PREFIX = "tests/fixtures/oai_dc/springshare/libguides"

BLANK_OR_MISSING_OPTIONAL_FIELDS_TIMDEX = timdex.TimdexRecord(
source="Libguides",
source="LibGuides",
source_link="https://libguides.mit.edu/materials",
timdex_record_id="libguides:materials",
title="Materials Science & Engineering",
citation="Materials Science & Engineering. Libguides. "
citation="Materials Science & Engineering. libguides. "
"https://libguides.mit.edu/materials",
content_type=["libguides"],
format="electronic resource",
Expand All @@ -19,8 +19,8 @@
links=[
timdex.Link(
url="https://libguides.mit.edu/materials",
kind="Libguide URL",
text="Libguide URL",
kind="LibGuide URL",
text="LibGuide URL",
)
],
)
Expand All @@ -32,11 +32,11 @@ def test_libguide_transform_with_all_fields_transforms_correctly():
)
output_records = SpringshareOaiDc("libguides", input_records)
assert next(output_records) == timdex.TimdexRecord(
source="Libguides",
source="LibGuides",
source_link="https://libguides.mit.edu/materials",
timdex_record_id="libguides:materials",
title="Materials Science & Engineering",
citation="Materials Science & Engineering. Libguides. "
citation="Ye Li. Materials Science & Engineering. MIT Libraries. libguides. "
"https://libguides.mit.edu/materials",
content_type=["libguides"],
contributors=[
Expand All @@ -55,8 +55,8 @@ def test_libguide_transform_with_all_fields_transforms_correctly():
links=[
timdex.Link(
url="https://libguides.mit.edu/materials",
kind="Libguide URL",
text="Libguide URL",
kind="LibGuide URL",
text="LibGuide URL",
)
],
publication_information=["MIT Libraries"],
Expand Down
8 changes: 4 additions & 4 deletions tests/springshare/test_research_databases.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
source_link="https://libguides.mit.edu/llba",
timdex_record_id="researchdatabases:llba",
title="Linguistics and Language Behavior Abstracts (LLBA)",
citation="Linguistics and Language Behavior Abstracts (LLBA). Research "
"Databases. https://libguides.mit.edu/llba",
citation="Linguistics and Language Behavior Abstracts (LLBA). researchdatabases. "
"https://libguides.mit.edu/llba",
content_type=["researchdatabases"],
format="electronic resource",
identifiers=[
Expand All @@ -36,8 +36,8 @@ def test_libguide_transform_with_all_fields_transforms_correctly():
source_link="https://libguides.mit.edu/llba",
timdex_record_id="researchdatabases:llba",
title="Linguistics and Language Behavior Abstracts (LLBA)",
citation="Linguistics and Language Behavior Abstracts (LLBA). Research "
"Databases. https://libguides.mit.edu/llba",
citation="Linguistics and Language Behavior Abstracts (LLBA). "
"researchdatabases. https://libguides.mit.edu/llba",
content_type=["researchdatabases"],
dates=[
timdex.Date(value="2022-01-28T22:15:37"),
Expand Down
55 changes: 55 additions & 0 deletions tests/springshare/test_springshare.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import transmogrifier.models as timdex
from transmogrifier.helpers import parse_xml_records
from transmogrifier.sources.springshare import SpringshareOaiDc

FIXTURES_PREFIX = "tests/fixtures/oai_dc/springshare"


def test_springshare_get_dates_valid():
"""
Test for valid date parsing
"""

input_records = parse_xml_records(f"{FIXTURES_PREFIX}/springshare_valid_dates.xml")
transformer_instance = SpringshareOaiDc("libguides", input_records)

# asser valid dates
for xml in transformer_instance.input_records:
date_field_val = transformer_instance.get_dates("test_get_dates", xml)
assert date_field_val == [
timdex.Date(kind=None, note=None, range=None, value="2000-01-01T00:00:00")
]


def test_springshare_get_dates_invalid(caplog):
"""
Tests that bad, missing, or blank data will log and continue to process
"""

input_records = parse_xml_records(
f"{FIXTURES_PREFIX}/springshare_invalid_dates.xml"
)
transformer_instance = SpringshareOaiDc("libguides", input_records)

# assert error handling for invalid dates
for xml in transformer_instance.input_records:
date_field_val = transformer_instance.get_dates("test_get_dates", xml)
assert date_field_val is None
assert "could not parse date for Springshare record" in caplog.text


def test_springshare_get_links_missing_identifier(caplog):
"""
Tests that links does logs error and continues to process when dc:identifier is absent
"""

input_records = parse_xml_records(
f"{FIXTURES_PREFIX}/springshare_record_missing_required_fields.xml"
)
transformer_instance = SpringshareOaiDc("libguides", input_records)

# assert error handling for invalid dates
for xml in transformer_instance.input_records:
links_field_val = transformer_instance.get_links("test_get_links", xml)
assert links_field_val is None
assert "cannot generate links for Springshare record" in caplog.text
30 changes: 7 additions & 23 deletions tests/test_oai_dc.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,16 @@
import logging

import transmogrifier.models as timdex
from transmogrifier.helpers import parse_xml_records
from transmogrifier.sources.oaidc import OaiDc

FIXTURES_PREFIX = "tests/fixtures/oai_dc"

BLANK_OR_MISSING_OPTIONAL_FIELDS_TIMDEX = timdex.TimdexRecord(
source="Libguides",
source="LibGuides",
source_link="https://libguides.mit.edu/guides/175846",
timdex_record_id="libguides:guides-175846",
title="Materials Science & Engineering",
citation="Materials Science & Engineering. Libguides. "
"https://libguides.mit.edu/materials",
citation="Materials Science & Engineering. libguides. "
"https://libguides.mit.edu/guides/175846",
content_type=["libguides"],
format="electronic resource",
identifiers=[
Expand All @@ -25,12 +23,12 @@ def test_oaidctransform_with_all_fields_transforms_correctly():
input_records = parse_xml_records(f"{FIXTURES_PREFIX}/oaidc_record_all_fields.xml")
output_records = OaiDc("libguides", input_records)
assert next(output_records) == timdex.TimdexRecord(
source="Libguides",
source="LibGuides",
source_link="https://libguides.mit.edu/guides/175846",
timdex_record_id="libguides:guides-175846",
title="Materials Science & Engineering",
citation="Materials Science & Engineering. Libguides. "
"https://libguides.mit.edu/materials",
citation="Ye Li. Materials Science & Engineering. MIT Libraries. libguides. "
"https://libguides.mit.edu/guides/175846",
content_type=["libguides"],
contributors=[
timdex.Contributor(
Expand Down Expand Up @@ -79,20 +77,6 @@ def test_oaidc_generic_date():
)
transformer_instance = OaiDc("libguides", input_records)
xml = next(transformer_instance.input_records)
assert transformer_instance.get_dates(xml) == [
assert transformer_instance.get_dates("test_source_record_id", xml) == [
timdex.Date(kind=None, note=None, range=None, value="2008-06-19T17:55:27")
]


def test_oaidc_missing_required_fields(caplog):
input_records = parse_xml_records(
f"{FIXTURES_PREFIX}/oaidc_record_missing_required_fields.xml"
)
output_records = OaiDc("libguides", input_records)
assert len(list(output_records)) == 0
assert output_records.processed_record_count == 1
assert (
"transmogrifier.sources.oaidc",
logging.ERROR,
"dc:title or dc:identifier is missing or blank",
) in caplog.record_tuples
2 changes: 1 addition & 1 deletion transmogrifier/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@
"transform-class": "transmogrifier.sources.datacite.Datacite",
},
"libguides": {
"name": "Libguides",
"name": "LibGuides",
"base-url": "https://libguides.mit.edu/",
"transform-class": "transmogrifier.sources.springshare.SpringshareOaiDc",
},
Expand Down
Loading

0 comments on commit 982f569

Please sign in to comment.