Skip to content

Commit

Permalink
TIMX-18-marc-transform-even-more-fields
Browse files Browse the repository at this point in the history
Why these changes are being introduced:
* Even more fields need to be added to the get_optional_fields method

How this addresses that need:
* Add code to extract locations, numbering, physical_description, and publication_frequency to get_optional_fields
* Update Marc unit tests and fixtures to account for new fields

Side effects of this change:
* None

Relevant ticket(s):
* https://mitlibraries.atlassian.net/browse/TIMX-18
  • Loading branch information
ehanson8 committed Nov 15, 2022
1 parent 313665a commit 27ebc36
Show file tree
Hide file tree
Showing 5 changed files with 159 additions and 0 deletions.
38 changes: 38 additions & 0 deletions tests/fixtures/marc/marc_record_all_fields.xml
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,29 @@
<subfield code="b">Aparte,</subfield>
<subfield code="c">[2016]</subfield>
</datafield>
<datafield tag="300" ind1=" " ind2=" ">
<subfield code="a">484 p. :</subfield>
<subfield code="b">ill. ;</subfield>
<subfield code="c">30 cm. +</subfield>
<subfield code="e">1 CD-ROM (4 3/4 in.).</subfield>
<subfield code="e">1 DVD-ROM (4 3/4 in.).</subfield>
</datafield>
<datafield tag="300" ind1=" " ind2=" ">
<subfield code="a">1 vocal score (248 p.) ;</subfield>
<subfield code="c">31 cm.</subfield>
</datafield>
<datafield tag="310" ind1=" " ind2=" ">
<subfield code="a">Six no. a year</subfield>
</datafield>
<datafield tag="310" ind1=" " ind2=" ">
<subfield code="a">Three times a year</subfield>
</datafield>
<datafield tag="362" ind1="0" ind2=" ">
<subfield code="a">-Bd. 148, 4 (dez. 1997).</subfield>
</datafield>
<datafield tag="362" ind1="1" ind2=" ">
<subfield code="a">Began in 1902.</subfield>
</datafield>
<datafield tag="505" ind1="0" ind2=" ">
<subfield code="a">Die Seejungfrau : sinfonische Dichtung : (Fantasie nach Hans Christian Andersen) (44:29) -- Sinfonietta, op. 23 (22:05).</subfield>
</datafield>
Expand Down Expand Up @@ -172,5 +195,20 @@
<subfield code="n">Part 1</subfield>
<subfield code="n">Part 2</subfield>
</datafield>
<datafield tag="751" ind1=" " ind2=" ">
<subfield code="a">Germany</subfield>
</datafield>
<datafield tag="751" ind1=" " ind2=" ">
<subfield code="a">Austria</subfield>
</datafield>
<datafield tag="752" ind1=" " ind2=" ">
<subfield code="a">Africa</subfield>
<subfield code="g">Nile River</subfield>
<subfield code="g">Sixth Cataract.</subfield>
</datafield>
<datafield tag="752" ind1=" " ind2=" ">
<subfield code="a">Austria</subfield>
<subfield code="d">Vienna</subfield>
</datafield>
</record>
</collection>
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,27 @@
<subfield code="a">a</subfield>
<subfield code="b">b</subfield>
</datafield>
<datafield tag="300" ind1=" " ind2=" ">
<subfield code=""></subfield>
<subfield code="a"></subfield>
<subfield code="c"></subfield>
<subfield code="f"></subfield>
<subfield code="g"></subfield>
<subfield code="a">a</subfield>
<subfield code="b">b</subfield>
<subfield code="c">c</subfield>
<subfield code="e">e</subfield>
<subfield code="f">f</subfield>
<subfield code="g">g</subfield>
</datafield>
<datafield tag="310" ind1=" " ind2=" ">
<subfield code=""></subfield>
<subfield code="a"></subfield>
</datafield>
<datafield tag="362" ind1="1" ind2=" ">
<subfield code=""></subfield>
<subfield code="a"></subfield>
</datafield>
<datafield tag="505">
<subfield></subfield>
<subfield code=""></subfield>
Expand Down Expand Up @@ -379,5 +400,28 @@
<subfield code="n">n</subfield>
<subfield code="p">p</subfield>
</datafield>
<datafield tag="751">
<subfield></subfield>
<subfield code=""></subfield>
<subfield code="a"></subfield>
</datafield>
<datafield tag="752">
<subfield></subfield>
<subfield code=""></subfield>
<subfield code="a"></subfield>
<subfield code="c"></subfield>
<subfield code="e"></subfield>
<subfield code="f"></subfield>
<subfield code="g"></subfield>
<subfield code="h"></subfield>
<subfield code="a">a</subfield>
<subfield code="b">b</subfield>
<subfield code="c">c</subfield>
<subfield code="d">d</subfield>
<subfield code="e">e</subfield>
<subfield code="f">f</subfield>
<subfield code="g">g</subfield>
<subfield code="h">h</subfield>
</datafield>
</record>
</collection>
5 changes: 5 additions & 0 deletions tests/fixtures/marc/marc_record_blank_optional_fields.xml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@
<datafield tag="245"></datafield>
<datafield tag="246"></datafield>
<datafield tag="250"></datafield>
<datafield tag="300"></datafield>
<datafield tag="310"></datafield>
<datafield tag="362"></datafield>
<datafield tag="505"></datafield>
<datafield tag="520"></datafield>
<datafield tag="600"></datafield>
Expand All @@ -25,5 +28,7 @@
<datafield tag="711"></datafield>
<datafield tag="730"></datafield>
<datafield tag="740"></datafield>
<datafield tag="751"></datafield>
<datafield tag="752"></datafield>
</record>
</collection>
22 changes: 22 additions & 0 deletions tests/test_marc.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,21 @@ def test_marc_record_all_fields_transform_correctly():
],
edition="9th ed. / Nick Ray ... [et al.]. Unabridged.",
literary_form="Nonfiction",
locations=[
timdex.Location(value="Germany", kind="Geographic Name"),
timdex.Location(value="Austria", kind="Geographic Name"),
timdex.Location(
value="Africa - Nile River - Sixth Cataract",
kind="Hierarchical Place Name",
),
timdex.Location(value="Austria - Vienna", kind="Hierarchical Place Name"),
],
numbering="-Bd. 148, 4 (dez. 1997). Began in 1902.",
physical_description=(
"484 p. : ill. ; 30 cm. + 1 CD-ROM (4 3/4 in.). 1 DVD-ROM "
"(4 3/4 in.). 1 vocal score (248 p.) ; 31 cm."
),
publication_frequency=["Six no. a year", "Three times a year"],
subjects=[
timdex.Subject(
value=["Renoir, Jean, - 1894-1979 - Bibliography"],
Expand Down Expand Up @@ -260,6 +275,13 @@ def test_marc_record_attribute_and_subfield_variations_transforms_correctly():
],
edition="a b",
literary_form="Fiction",
locations=[
timdex.Location(
value="a - b - c - d - e - f - g - h",
kind="Hierarchical Place Name",
)
],
physical_description="a b c e f g",
subjects=[
timdex.Subject(
value=[
Expand Down
50 changes: 50 additions & 0 deletions transmogrifier/sources/marc.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,14 +221,64 @@ def get_optional_fields(self, xml: Tag) -> dict:
fields["literary_form"] = "Fiction"

# locations
location_marc_fields = [
{
"tag": "751",
"subfields": "a",
"kind": "Geographic Name",
},
{
"tag": "752",
"subfields": "abcdefgh",
"kind": "Hierarchical Place Name",
},
]
for location_marc_field in location_marc_fields:
for datafield in xml.find_all("datafield", tag=location_marc_field["tag"]):
if location_value := (
self.create_subfield_value_string_from_datafield(
datafield,
location_marc_field["subfields"],
" - ",
)
):
fields.setdefault("locations", []).append(
timdex.Location(
value=location_value.rstrip(" .,/)"),
kind=location_marc_field["kind"],
)
)

# notes

# numbering
numbering_values = []
for datafield in xml.find_all("datafield", tag="362"):
if numbering_value := self.create_subfield_value_string_from_datafield(
datafield, "a", " "
):
numbering_values.append(numbering_value)
fields["numbering"] = " ".join(numbering_values) or None

# physical_description
physical_description_values = []
for datafield in xml.find_all("datafield", tag="300"):
if physical_description_value := (
self.create_subfield_value_string_from_datafield(
datafield, "abcefg", " "
)
):
physical_description_values.append(physical_description_value)
fields["physical_description"] = " ".join(physical_description_values) or None

# publication_frequency
for datafield in xml.find_all("datafield", tag="310"):
if publication_frequency_value := (
self.create_subfield_value_string_from_datafield(datafield, "a", " ")
):
fields.setdefault("publication_frequency", []).append(
publication_frequency_value
)

# publication_information

Expand Down

0 comments on commit 27ebc36

Please sign in to comment.