Skip to content

Commit

Permalink
Updates based on discussion in PR #36
Browse files Browse the repository at this point in the history
* Remove subfield 0 from subject extraction code and add delimiter between subfields
* Update unit test values and fixtures to account for code changes
  • Loading branch information
ehanson8 committed Nov 15, 2022
1 parent 4e4644b commit 4da096c
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 50 deletions.
15 changes: 0 additions & 15 deletions tests/fixtures/marc/marc_record_all_fields.xml
Original file line number Diff line number Diff line change
Expand Up @@ -88,50 +88,35 @@
<datafield tag="600" ind1="1" ind2="0">
<subfield code="a">Renoir, Jean,</subfield>
<subfield code="d">1894-1979</subfield>
<subfield code="0">(DLC)n 79018203</subfield>
<subfield code="v">Bibliography.</subfield>
<subfield code="0">(DLC)sh 99001362</subfield>
</datafield>
<datafield tag="600" ind1="1" ind2="7">
<subfield code="a">Renoir, Jean,</subfield>
<subfield code="d">1894-1979.</subfield>
<subfield code="2">fast</subfield>
<subfield code="0">(OCoLC)fst00031256</subfield>
</datafield>
<datafield tag="610" ind1="1" ind2="0">
<subfield code="a">United States.</subfield>
<subfield code="b">Federal Bureau of Investigation</subfield>
<subfield code="0">(DLC)n 78095617</subfield>
<subfield code="x">History.</subfield>
<subfield code="0">(DLC)sh 99005024</subfield>
</datafield>
<datafield tag="610" ind1="1" ind2="7">
<subfield code="a">United States.</subfield>
<subfield code="b">Federal Bureau of Investigation.</subfield>
<subfield code="2">fast</subfield>
<subfield code="0">(OCoLC)fst00528882</subfield>
</datafield>
<datafield tag="650" ind1=" " ind2="6">
<subfield code="a">Musique vocale sacrée</subfield>
<subfield code="0">(CaQQLa)201-0004748</subfield>
<subfield code="z">France</subfield>
<subfield code="0">(CaQQLa)201-0452039</subfield>
<subfield code="y">500-1400.</subfield>
<subfield code="0">(CaQQLa)201-0373671</subfield>
</datafield>
<datafield tag="650" ind1=" " ind2="0">
<subfield code="a">Sacred songs, Unaccompanied.</subfield>
</datafield>
<datafield tag="651" ind1=" " ind2="0">
<subfield code="a">Great Plains</subfield>
<subfield code="0">(DLC)sh 85056998</subfield>
<subfield code="x">Climate.</subfield>
<subfield code="0">(DLC)sh 00007747</subfield>
</datafield>
<datafield tag="651" ind1=" " ind2="7">
<subfield code="a">Great Plains.</subfield>
<subfield code="2">fast</subfield>
<subfield code="0">(OCoLC)fst01240567</subfield>
</datafield>
<datafield tag="700" ind1="1" ind2=" ">
<subfield code="a">Binelli, Daniel,</subfield>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,6 @@
<subfield code="x"></subfield>
<subfield code="y"></subfield>
<subfield code="z"></subfield>
<subfield code="0">0</subfield>
<subfield code="a">a</subfield>
<subfield code="b">b</subfield>
<subfield code="c">c</subfield>
Expand Down Expand Up @@ -247,7 +246,6 @@
<subfield code="x"></subfield>
<subfield code="y"></subfield>
<subfield code="z"></subfield>
<subfield code="0">0</subfield>
<subfield code="a">a</subfield>
<subfield code="b">b</subfield>
<subfield code="c">c</subfield>
Expand All @@ -274,12 +272,10 @@
<datafield tag="650">
<subfield></subfield>
<subfield code=""></subfield>
<subfield code="0"></subfield>
<subfield code="v"></subfield>
<subfield code="x"></subfield>
<subfield code="y"></subfield>
<subfield code="z"></subfield>
<subfield code="0">0</subfield>
<subfield code="a">a</subfield>
<subfield code="v">v</subfield>
<subfield code="x">x</subfield>
Expand All @@ -289,12 +285,10 @@
<datafield tag="651">
<subfield></subfield>
<subfield code=""></subfield>
<subfield code="0"></subfield>
<subfield code="v"></subfield>
<subfield code="x"></subfield>
<subfield code="y"></subfield>
<subfield code="z"></subfield>
<subfield code="0">0</subfield>
<subfield code="a">a</subfield>
<subfield code="v">v</subfield>
<subfield code="x">x</subfield>
Expand Down
41 changes: 17 additions & 24 deletions tests/test_marc.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,44 +151,31 @@ def test_marc_record_all_fields_transform_correctly():
literary_form="Nonfiction",
subjects=[
timdex.Subject(
value=[
"Renoir, Jean, 1894-1979 (DLC)n 79018203 Bibliography. "
"(DLC)sh 99001362"
],
value=["Renoir, Jean, - 1894-1979 - Bibliography"],
kind="Personal Name",
),
timdex.Subject(
value=["Renoir, Jean, 1894-1979. (OCoLC)fst00031256"],
value=["Renoir, Jean, - 1894-1979"],
kind="Personal Name",
),
timdex.Subject(
value=[
"United States. Federal Bureau of Investigation (DLC)n 78095617 "
"History. (DLC)sh 99005024"
],
value=["United States. - Federal Bureau of Investigation - History"],
kind="Corporate Name",
),
timdex.Subject(
value=[
"United States. Federal Bureau of Investigation. (OCoLC)fst00528882"
],
value=["United States. - Federal Bureau of Investigation"],
kind="Corporate Name",
),
timdex.Subject(
value=[
"Musique vocale sacrée (CaQQLa)201-0004748 France "
"(CaQQLa)201-0452039 500-1400. (CaQQLa)201-0373671"
],
value=["Musique vocale sacrée - France - 500-1400"],
kind="Topical Term",
),
timdex.Subject(value=["Sacred songs, Unaccompanied"], kind="Topical Term"),
timdex.Subject(
value=["Great Plains (DLC)sh 85056998 Climate. (DLC)sh 00007747"],
value=["Great Plains - Climate"],
kind="Geographic Name",
),
timdex.Subject(
value=["Great Plains. (OCoLC)fst01240567"], kind="Geographic Name"
),
timdex.Subject(value=["Great Plains"], kind="Geographic Name"),
],
summary=[
"This safety guide provides guidance on meeting the requirements for the "
Expand Down Expand Up @@ -275,15 +262,21 @@ def test_marc_record_attribute_and_subfield_variations_transforms_correctly():
literary_form="Fiction",
subjects=[
timdex.Subject(
value=["0 a b c d e f g h j k l m n o p q r s t u v x y z"],
value=[
"a - b - c - d - e - f - g - h - j - k - l - m - n - o - p - q - r - "
"s - t - u - v - x - y - z"
],
kind="Personal Name",
),
timdex.Subject(
value=["0 a b c d e f g h k l m n o p r s t u v x y z"],
value=[
"a - b - c - d - e - f - g - h - k - l - m - n - o - p - r - s - t - "
"u - v - x - y - z"
],
kind="Corporate Name",
),
timdex.Subject(value=["0 a v x y z"], kind="Topical Term"),
timdex.Subject(value=["0 a v x y z"], kind="Geographic Name"),
timdex.Subject(value=["a - v - x - y - z"], kind="Topical Term"),
timdex.Subject(value=["a - v - x - y - z"], kind="Geographic Name"),
],
summary=["a"],
)
Expand Down
10 changes: 5 additions & 5 deletions transmogrifier/sources/marc.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,22 +240,22 @@ def get_optional_fields(self, xml: Tag) -> dict:
subject_marc_fields = [
{
"tag": "600",
"subfields": "0abcdefghjklmnopqrstuvxyz",
"subfields": "abcdefghjklmnopqrstuvxyz",
"kind": "Personal Name",
},
{
"tag": "610",
"subfields": "0abcdefghklmnoprstuvxyz",
"subfields": "abcdefghklmnoprstuvxyz",
"kind": "Corporate Name",
},
{
"tag": "650",
"subfields": "0avxyz",
"subfields": "avxyz",
"kind": "Topical Term",
},
{
"tag": "651",
"subfields": "0avxyz",
"subfields": "avxyz",
"kind": "Geographic Name",
},
]
Expand All @@ -265,7 +265,7 @@ def get_optional_fields(self, xml: Tag) -> dict:
self.create_subfield_value_string_from_datafield(
datafield,
subject_marc_field["subfields"],
" ",
" - ",
)
):
fields.setdefault("subjects", []).append(
Expand Down

0 comments on commit 4da096c

Please sign in to comment.