Skip to content

Commit

Permalink
Remove 'source_record_id' as param, replace with call inside field me…
Browse files Browse the repository at this point in the history
…thod instead
  • Loading branch information
jonavellecuerdo committed May 24, 2024
1 parent 907b3cf commit 8ba0f31
Show file tree
Hide file tree
Showing 4 changed files with 91 additions and 55 deletions.
37 changes: 29 additions & 8 deletions tests/sources/xml/test_oai_dc.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,42 +152,63 @@ def test_get_contributors_transforms_correctly_if_fields_missing():

def test_get_dates_success():
source_record = create_oaidc_source_record_stub(
header_insert=(
"""
<identifier>oai:libguides.com:guides/175846</identifier>
"""
),
metadata_insert=(
"""
<dc:date>2008-06-19T17:55:27</dc:date>
"""
)
),
)
assert OaiDc.get_dates(source_record=source_record, source_record_id="abc") == [
assert OaiDc.get_dates(source_record=source_record) == [
timdex.Date(kind="Unknown", value="2008-06-19T17:55:27")
]


def test_get_dates_transforms_correctly_if_fields_blank():
source_record = create_oaidc_source_record_stub(
header_insert=(
"""
<identifier>oai:libguides.com:guides/175846</identifier>
"""
),
metadata_insert=(
"""
<dc:date></dc:date>
"""
)
),
)
assert OaiDc.get_dates(source_record=source_record, source_record_id="abc") is None
assert OaiDc.get_dates(source_record=source_record) is None


def test_get_dates_transforms_correctly_if_fields_missing():
source_record = create_oaidc_source_record_stub()
assert OaiDc.get_dates(source_record=source_record, source_record_id="abc") is None
source_record = create_oaidc_source_record_stub(
header_insert=(
"""
<identifier>oai:libguides.com:guides/175846</identifier>
"""
)
)
assert OaiDc.get_dates(source_record=source_record) is None


def test_get_dates_transforms_correctly_if_date_invalid():
source_record = create_oaidc_source_record_stub(
header_insert=(
"""
<identifier>oai:libguides.com:guides/175846</identifier>
"""
),
metadata_insert=(
"""
<dc:date>INVALID</dc:date>
"""
)
),
)
assert OaiDc.get_dates(source_record=source_record, source_record_id="abc") is None
assert OaiDc.get_dates(source_record=source_record) is None


def test_get_identifiers_success():
Expand Down
83 changes: 52 additions & 31 deletions tests/sources/xml/test_springshare.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,77 +44,87 @@ def create_oaidc_source_record_stub(

def test_get_dates_success():
source_record = create_oaidc_source_record_stub(
header_insert=(
"""
<identifier>oai:libguides.com:guides/175846</identifier>
"""
),
metadata_insert=(
"""
<dc:date>January 1st, 2000</dc:date>
"""
)
),
)
assert SpringshareOaiDc.get_dates(
source_record=source_record, source_record_id="abc"
) == [timdex.Date(kind="Created", value="2000-01-01T00:00:00")]
assert SpringshareOaiDc.get_dates(source_record=source_record) == [
timdex.Date(kind="Created", value="2000-01-01T00:00:00")
]


def test_get_dates_transforms_correctly_if_optional_fields_blank():
source_record = create_oaidc_source_record_stub(
header_insert=(
"""
<identifier>oai:libguides.com:guides/175846</identifier>
"""
),
metadata_insert=(
"""
<dc:date></dc:date>
"""
)
)
assert (
SpringshareOaiDc.get_dates(
source_record=source_record,
source_record_id="abc",
)
is None
),
)
assert SpringshareOaiDc.get_dates(source_record=source_record) is None


def test_get_dates_transforms_correctly_if_optional_fields_missing():
source_record = create_oaidc_source_record_stub()
assert (
SpringshareOaiDc.get_dates(
source_record=source_record,
source_record_id="abc",
source_record = create_oaidc_source_record_stub(
header_insert=(
"""
<identifier>oai:libguides.com:guides/175846</identifier>
"""
)
is None
)
assert SpringshareOaiDc.get_dates(source_record=source_record) is None


def test_get_dates_transforms_correctly_and_logs_error_if_date_invalid(
caplog,
):
caplog.set_level(logging.DEBUG)
source_record = create_oaidc_source_record_stub(
header_insert=(
"""
<identifier>oai:libguides.com:guides/175846</identifier>
"""
),
metadata_insert=(
"""
<dc:date>INVALID</dc:date>
"""
)
)
assert (
SpringshareOaiDc.get_dates(source_record=source_record, source_record_id="abc")
is None
),
)
assert SpringshareOaiDc.get_dates(source_record=source_record) is None
assert (
"Record ID abc has a date that cannot be parsed: Unknown string format: INVALID"
"Record ID guides/175846 has a date that cannot be parsed: Unknown string format: INVALID"
in caplog.text
)


def test_get_links_success():
source_record = create_oaidc_source_record_stub(
header_insert=(
"""
<identifier>oai:libguides.com:guides/175846</identifier>
"""
),
metadata_insert=(
"""
<dc:identifier>https://libguides.mit.edu/materials</dc:identifier>
"""
)
),
)
assert SpringshareOaiDc(source="libguides", source_records=iter(())).get_links(
source_record=source_record,
source_record_id="abc",
source_record=source_record
) == [
timdex.Link(
kind="LibGuide URL",
Expand All @@ -126,25 +136,36 @@ def test_get_links_success():

def test_get_links_transforms_correctly_if_required_fields_blank():
source_record = create_oaidc_source_record_stub(
header_insert=(
"""
<identifier>oai:libguides.com:guides/175846</identifier>
"""
),
metadata_insert=(
"""
<dc:identifier></dc:identifier>
"""
)
),
)
assert (
SpringshareOaiDc(source="libguides", source_records=iter(())).get_links(
source_record=source_record, source_record_id="abc"
source_record=source_record
)
is None
)


def test_get_links_transforms_correctly_if_required_fields_missing():
source_record = create_oaidc_source_record_stub()
source_record = create_oaidc_source_record_stub(
header_insert=(
"""
<identifier>oai:libguides.com:guides/175846</identifier>
"""
)
)
assert (
SpringshareOaiDc(source="libguides", source_records=iter(())).get_links(
source_record=source_record, source_record_id="abc"
source_record=source_record
)
is None
)
Expand Down
16 changes: 6 additions & 10 deletions transmogrifier/sources/xml/oaidc.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,6 @@ def get_optional_fields(self, source_record: Tag) -> dict | None:
"""
fields: dict = {}

# extract source_record_id early for use and logging
source_record_id = self.get_source_record_id(source_record)

# alternate_titles: not set in this transformation

# call_numbers: not set in this transformation
Expand All @@ -45,7 +42,7 @@ def get_optional_fields(self, source_record: Tag) -> dict | None:
fields["contributors"] = self.get_contributors(source_record)

# dates
fields["dates"] = self.get_dates(source_record, source_record_id)
fields["dates"] = self.get_dates(source_record)

# edition: not set in this transformation

Expand All @@ -64,7 +61,7 @@ def get_optional_fields(self, source_record: Tag) -> dict | None:
# languages: not set in this transformation

# links
fields["links"] = self.get_links(source_record, source_record_id)
fields["links"] = self.get_links(source_record)

# literary_form: not set in this transformation

Expand Down Expand Up @@ -103,9 +100,7 @@ def get_contributors(cls, source_record: Tag) -> list[timdex.Contributor] | None
] or None

@classmethod
def get_dates(
cls, source_record: Tag, source_record_id: str
) -> list[timdex.Date] | None:
def get_dates(cls, source_record: Tag) -> list[timdex.Date] | None:
"""
Method to get TIMDEX "dates" field. This method broken out to allow subclasses
to override.
Expand All @@ -118,6 +113,7 @@ def get_dates(
"""
dates = []
source_record_id = cls.get_source_record_id(source_record)
for date in source_record.find_all("dc:date", string=True):
date_value = str(date.string.strip())
if validate_date(date_value, source_record_id):
Expand All @@ -141,15 +137,15 @@ def get_identifiers(cls, source_record: Tag) -> list[timdex.Identifier] | None:
return identifiers or None

def get_links(
self, _source_record: Tag, _source_record_id: str
self,
_source_record: Tag,
) -> list[timdex.Link] | None:
"""
Method to get TIMDEX "links" field. This method broken out to allow subclasses
to override.
Args:
source_record: A BeautifulSoup Tag representing a single OAI DC record in XML.
source_record_id: Source record ID.
"""
return [] or None

Expand Down
10 changes: 4 additions & 6 deletions transmogrifier/sources/xml/springshare.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,7 @@ class SpringshareOaiDc(OaiDc):
"""

@classmethod
def get_dates(
cls, source_record: Tag, source_record_id: str
) -> list[timdex.Date] | None:
def get_dates(cls, source_record: Tag) -> list[timdex.Date] | None:
"""
Overrides OaiDc's default get_dates() logic for Springshare records.
Expand All @@ -39,6 +37,7 @@ def get_dates(
source_record_id: Source record ID.
"""
dates = []
source_record_id = cls.get_source_record_id(source_record)
if date := source_record.find("dc:date", string=True):
try:
date_iso_str = date_parser(str(date.string).strip()).isoformat()
Expand All @@ -55,9 +54,7 @@ def get_dates(
)
return dates or None

def get_links(
self, source_record: Tag, source_record_id: str
) -> list[timdex.Link] | None:
def get_links(self, source_record: Tag) -> list[timdex.Link] | None:
"""
Overrides OaiDc's default get_links() logic for Springshare records.
Expand All @@ -66,6 +63,7 @@ def get_links(
source_record_id: Source record ID.
"""
links = []
source_record_id = self.get_source_record_id(source_record)
if identifier := source_record.find("dc:identifier", string=True):
singular_source_name = self.source_name.rstrip("s")
links.append(
Expand Down

0 comments on commit 8ba0f31

Please sign in to comment.