Skip to content

Commit

Permalink
Add JSON crosswalk method
Browse files Browse the repository at this point in the history
Why these changes are being introduced:
* The transform should log warnings and ignore invalid codes in JSON crosswalks as it does for XML

How this addresses that need:
* Add method to extract names from JSON crosswalks and log warnings and ignore invalid codes
* Updates transform to use this method
* Add correspondings unit tests
* Update unit tests to account for code changes

Side effects of this change:
* None

Relevant ticket(s):
* https://mitlibraries.atlassian.net/browse/TIMX-18
  • Loading branch information
ehanson8 committed Jan 5, 2023
1 parent 7a65807 commit cefdc92
Show file tree
Hide file tree
Showing 2 changed files with 96 additions and 39 deletions.
31 changes: 28 additions & 3 deletions tests/test_marc.py
Original file line number Diff line number Diff line change
Expand Up @@ -531,9 +531,7 @@ def test_marc_record_attribute_and_subfield_variations_transforms_correctly():
dates=[timdex.Date(kind="Publication date", value="2016")],
edition="a b",
holdings=[
timdex.Holding(
call_number="bb", collection="i", format="t", location="aa", note="g"
),
timdex.Holding(call_number="bb", note="g"),
timdex.Holding(
collection="j",
format="electronic resource",
Expand Down Expand Up @@ -756,6 +754,33 @@ def test_create_subfield_value_string_from_datafield_with_blank_values():
assert Marc.create_subfield_value_string_from_datafield(datafield, "ad") == ""


def test_json_crosswalk_code_to_name_returns_none_if_invalid(
caplog, marc_content_type_crosswalk
):
assert (
Marc.json_crosswalk_code_to_name(
"wrong",
marc_content_type_crosswalk,
"record-01",
"MARC field",
)
is None
)
assert "Record #record-01 uses an invalid code in MARC field: wrong" in caplog.text


def test_json_crosswalk_code_to_name_returns_name(caplog, marc_content_type_crosswalk):
assert (
Marc.json_crosswalk_code_to_name(
"a",
marc_content_type_crosswalk,
"record-01",
"MARC field",
)
== "Language material"
)


def test_loc_crosswalk_code_to_name_returns_none_if_invalid(
caplog, loc_country_crosswalk
):
Expand Down
104 changes: 68 additions & 36 deletions transmogrifier/sources/marc.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,10 +119,15 @@ def get_optional_fields(self, xml: Tag) -> dict:
# citation not used in MARC

# content_type
if leader:
fields["content_type"] = [
marc_content_type_crosswalk.get(leader.string[6:7], leader.string[6:7])
]
if leader and (
content_type := Marc.json_crosswalk_code_to_name(
leader.string[6:7],
marc_content_type_crosswalk,
record_id,
"Leader/06",
)
):
fields["content_type"] = [content_type]

# contents
for datafield in xml.find_all("datafield", tag="505"):
Expand Down Expand Up @@ -225,38 +230,43 @@ def get_optional_fields(self, xml: Tag) -> dict:
holding_call_number_value = (
self.create_subfield_value_string_from_datafield(datafield, ["bb"])
)
holding_collection_value = ", ".join(
[
holdings_collection_crosswalk.get(
holding_collection_value, holding_collection_value
)
for holding_collection_value in (
self.create_subfield_value_list_from_datafield(datafield, "i")
)
]
)
holding_format_value = ", ".join(
[
holdings_format_crosswalk.get(
holding_format_value, holding_format_value
)
for holding_format_value in (
self.create_subfield_value_list_from_datafield(datafield, "t")
)
]
)
holding_location_value = ", ".join(
[
holdings_location_crosswalk.get(
holding_location_value, holding_location_value
)
for holding_location_value in (
self.create_subfield_value_list_from_datafield(
datafield, ["aa"]
)
)
]
)
crosswalked_collection_values = []
for holding_collection in self.create_subfield_value_list_from_datafield(
datafield, "i"
):
if crosswalked_collection_value := Marc.json_crosswalk_code_to_name(
holding_collection,
holdings_collection_crosswalk,
record_id,
"985 $i",
):
crosswalked_collection_values.append(crosswalked_collection_value)
holding_collection_value = ", ".join(crosswalked_collection_values)
crosswalked_format_values = []
for holding_format in self.create_subfield_value_list_from_datafield(
datafield, "t"
):
if crosswalked_format_value := Marc.json_crosswalk_code_to_name(
holding_format,
holdings_format_crosswalk,
record_id,
"985 $t",
):
crosswalked_format_values.append(crosswalked_format_value)
holding_format_value = ", ".join(crosswalked_format_values)
crosswalked_location_values = []
for holding_location in self.create_subfield_value_list_from_datafield(
datafield, ["aa"]
):
if crosswalked_location_value := Marc.json_crosswalk_code_to_name(
holding_location,
holdings_location_crosswalk,
record_id,
"985 $aa",
):
crosswalked_location_values.append(crosswalked_location_value)
holding_location_value = ", ".join(crosswalked_location_values)

holding_note_value = self.create_subfield_value_string_from_datafield(
datafield, "g", ", "
)
Expand Down Expand Up @@ -795,6 +805,28 @@ def create_subfield_value_string_from_datafield(
Marc.create_subfield_value_list_from_datafield(xml_element, subfield_codes)
)

@staticmethod
def json_crosswalk_code_to_name(
code: str, crosswalk: dict, record_id: str, field_name: str
) -> Optional[str]:
"""
Retrieve the name associated with a given code from a JSON crosswalk. Logs a
warning and returns None if the code isn't found in the crosswalk.
Args:
code: The code from a MARC record.
crosswalk: The crosswalk dict to use, loaded from a config file.
record_id: The MMS ID of the MARC record.
field_name: The MARC field containing the code.
"""
name = crosswalk.get(code)
if name is None:
logger.warning(
"Record #%s uses an invalid code in %s: %s", record_id, field_name, code
)
return None
return name

@staticmethod
def loc_crosswalk_code_to_name(
code: str, crosswalk: Tag, record_id: str, code_type: str
Expand Down

0 comments on commit cefdc92

Please sign in to comment.