Skip to content

Commit

Permalink
books: fixed record match queries
Browse files Browse the repository at this point in the history
* fixed dates and pages types in the output
  • Loading branch information
kpsherva committed Jul 8, 2020
1 parent f3664d9 commit 746653c
Show file tree
Hide file tree
Showing 11 changed files with 110 additions and 84 deletions.
38 changes: 21 additions & 17 deletions cds_dojson/marc21/fields/books/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@

import pycountry
from dateutil import parser
from dateutil.parser import ParserError
from dojson.errors import IgnoreKey
from dojson.utils import filter_values, flatten, for_each_value, force_list

Expand Down Expand Up @@ -433,7 +434,7 @@ def alternative_identifiers(self, key, value):
if field_type and field_type.lower() == 'doi':
# if 0247__2 == doi it is a DOI identifier
self['identifiers'] = dois(self, key, value)
raise IgnoreKey('external_system_identifiers')
raise IgnoreKey('alternative_identifiers')
elif field_type and field_type.lower() == 'asin':
indentifier_entry.update({'value': sub_a,
'scheme': 'ASIN'})
Expand Down Expand Up @@ -616,6 +617,7 @@ def languages(self, key, value):
@out_strip
def subject_classification(self, key, value):
"""Translates subject classification field."""
prev_subjects = self.get('subjects', [])
_subject_classification = {'value': clean_val('a', value, str, req=True)}
if key == '080__':
_subject_classification.update({'scheme': 'UDC'})
Expand All @@ -630,7 +632,10 @@ def subject_classification(self, key, value):
_subject_classification.update({'scheme': 'ICS'})
elif key.startswith('050'):
_subject_classification.update({'scheme': 'LoC'})
return _subject_classification
if _subject_classification not in prev_subjects:
return _subject_classification
else:
raise IgnoreKey('subjects')


@model.over('keywords', '(^084__)|(^6531_)')
Expand All @@ -644,14 +649,14 @@ def keywords(self, key, value):
sub_2 = clean_val('2', value, str)
if sub_2 and sub_2 == 'PACS':
_keywords.append({
'name': clean_val('a', value, str, req=True),
'value': clean_val('a', value, str, req=True),
'source': 'PACS',
})
else:
raise IgnoreKey('keywords')
elif key == '6531_':
_keywords.append({
'name': clean_val('a', value, str),
'value': clean_val('a', value, str),
'source': value.get('9') or value.get('g'),
# Easier to solve here
})
Expand Down Expand Up @@ -737,8 +742,11 @@ def imprint(self, key, value):
reprint = clean_val('g', value, str)
if reprint:
reprint = reprint.lower().replace('repr.', '').strip()
date = parser.parse(clean_val('c', value, str, req=True))
self['publication_year'] = date.date().year
try:
date = parser.parse(clean_val('c', value, str, req=True))
except ParserError:
raise UnexpectedValue(subfield='c')
self['publication_year'] = str(date.date().year)
return {
'date': clean_val('c', value, str, req=True),
'place': clean_val('a', value, str),
Expand Down Expand Up @@ -768,22 +776,18 @@ def book_series(self, key, value):


@model.over('note', '^500__')
@filter_values
@out_strip
def note(self, key, value):
"""Translates public notes."""
# merge all found notes
_note = self.get('note', {})
_value = _note.get('value', '')
if _value:
_value = \
"{0} / {1}".format(_value, clean_val('a', value, str, req=True))
_note = self.get('note', "")
if _note:
_note = \
"{0} / {1}".format(_note, clean_val('a', value, str, req=True))
else:
_value = clean_val('a', value, str, req=True)
_note = clean_val('a', value, str, req=True)

return {
'value': _value,
'source': clean_val('9', value, str)
}
return _note


@model.over('alternative_abstracts', '^520__')
Expand Down
2 changes: 1 addition & 1 deletion cds_dojson/marc21/fields/books/book.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def number_of_pages(self, key, value):
if parts['physical_copy_description']:
self['physical_copy_description'] = parts['physical_copy_description']
if parts['number_of_pages']:
return parts['number_of_pages']
return str(parts['number_of_pages'])
raise UnexpectedValue(subfield='a')


Expand Down
11 changes: 7 additions & 4 deletions cds_dojson/marc21/fields/books/multipart.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,13 +93,16 @@ def isbns(self, key, value):


@model.over('title', '^245__')
@filter_values
@out_strip
def title(self, key, value):
"""Translates book series title."""
# assume that is goes by order of fields and check 245 first
return {'title': clean_val('a', value, str),
'subtitle': clean_val('b', value, str),
}
if 'b' in value:
_alternative_titles = self.get('alternative_titles', [])
_alternative_titles.append({'type': 'SUBTITLE',
'value': clean_val('b', value, str)})
self['alternative_titles'] = _alternative_titles
return clean_val('a', value, str)


@model.over('_migration', '^246__')
Expand Down
6 changes: 3 additions & 3 deletions cds_dojson/marc21/fields/books/serial.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from dojson.utils import filter_values, for_each_value

from cds_dojson.marc21.fields.books.errors import UnexpectedValue
from cds_dojson.marc21.fields.utils import clean_val
from cds_dojson.marc21.fields.utils import clean_val, out_strip
from cds_dojson.marc21.models.books.serial import model


Expand All @@ -32,7 +32,7 @@ def recid(self, key, value):

@model.over('title', '^490__')
@for_each_value
@filter_values
@out_strip
def title(self, key, value):
"""Translates book series title."""
_identifiers = self.get('identifiers', [])
Expand All @@ -41,4 +41,4 @@ def title(self, key, value):
_identifiers.append({'scheme': 'ISSN', 'value': issn})
self['identifiers'] = _identifiers
self['mode_of_issuance'] = 'SERIAL'
return {'title': clean_val('a', value, str, req=True)}
return clean_val('a', value, str, req=True)
4 changes: 3 additions & 1 deletion cds_dojson/marc21/models/books/book.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,9 @@ class CDSBook(CDSOverdoBookBase):

__query__ = '690C_:BOOK OR 690C_:"YELLOW REPORT" OR ' \
'690C_:BOOKSUGGESTION OR 980__:PROCEEDINGS OR 980__:PERI OR ' \
'697C_:LEGSERLIB OR 697C_:"ENGLISH BOOK CLUB" -980__:DELETED'
'(-980:STANDARD 980:BOOK) OR ' \
'697C_:LEGSERLIB ' \
'-980__:DELETED -980__:MIGRATED -980:__STANDARD' \

__schema__ = 'https://127.0.0.1:5000/schemas/documents/document-v1.0.0.json'

Expand Down
3 changes: 2 additions & 1 deletion cds_dojson/marc21/models/books/multipart.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ class CDSMultipart(CDSOverdoBookBase):

__query__ = '(690C_:BOOK OR 690C_:"YELLOW REPORT" OR ' \
'690C_:BOOKSUGGESTION OR 980__:PROCEEDINGS OR 980__:PERI OR ' \
'697C_:LEGSERLIB OR 697C_:"ENGLISH BOOK CLUB" -980__:DELETED)'\
'697C_:LEGSERLIB -980__:DELETED -980__:MIGRATED)'\
'AND 246__:/[a-zA-Z0-9]+/ '

__schema__ = 'https://127.0.0.1:5000/schemas/series/series-v1.0.0.json'
Expand Down Expand Up @@ -67,6 +67,7 @@ class CDSMultipart(CDSOverdoBookBase):
'082__a',
'084__2',
'084__a',
'084__c',
'088__9',
'088__a',
'100__a',
Expand Down
2 changes: 1 addition & 1 deletion cds_dojson/marc21/models/books/serial.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ class CDSSerial(CDSOverdoBookBase):

__query__ = '(690C_:BOOK OR 690C_:"YELLOW REPORT" OR ' \
'690C_:BOOKSUGGESTION OR 980__:PROCEEDINGS OR 980__:PERI OR ' \
'697C_:LEGSERLIB OR 697C_:"ENGLISH BOOK CLUB" -980__:DELETED)' \
'697C_:LEGSERLIB -980__:DELETED -980__:MIGRATED)' \
' AND 490__:/[a-zA-Z0-9]+/'

__schema__ = 'https://127.0.0.1:5000/schemas/series/series-v1.0.0.json'
Expand Down
2 changes: 1 addition & 1 deletion cds_dojson/marc21/models/books/standard.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
class CDSStandard(CDSOverdoBookBase):
"""Translation Index for CDS Books."""

__query__ = '690C_:STANDARD -980__:DELETED'
__query__ = '690C_:STANDARD OR 980__:STANDARD -980__:DELETED -980__:MIGRATED'

__schema__ = 'records/books/book/book-v.0.0.1.json'

Expand Down
31 changes: 12 additions & 19 deletions tests/test_books.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,10 +161,10 @@ def test_subject_classification(app):
</datafield>
""", {
'keywords': [
{'name': '13.75.Jz', 'source': 'PACS'},
{'name': '13.60.Rj', 'source': 'PACS'},
{'name': '14.20.Jn', 'source': 'PACS'},
{'name': '25.80.Nv', 'source': 'PACS'},
{'value': '13.75.Jz', 'source': 'PACS'},
{'value': '13.60.Rj', 'source': 'PACS'},
{'value': '14.20.Jn', 'source': 'PACS'},
{'value': '25.80.Nv', 'source': 'PACS'},
]
}
)
Expand Down Expand Up @@ -1626,7 +1626,7 @@ def test_imprint(app):
<subfield code="g">2015</subfield>
</datafield>
""", {
'publication_year': 2013,
'publication_year': "2013",
'imprint': {
'place': 'Sydney',
'publisher': 'Allen & Unwin',
Expand Down Expand Up @@ -1681,15 +1681,15 @@ def test_number_of_pages(app):
<subfield code="a">373 p</subfield>
</datafield>
""", {
'number_of_pages': 373,
'number_of_pages': "373",
})
check_transformation(
"""
<datafield tag="300" ind1=" " ind2=" ">
<subfield code="a">480 p. ; 1 CD-ROM suppl</subfield>
</datafield>
""", {
'number_of_pages': 480,
'number_of_pages': "480",
'physical_copy_description': '1 CD-ROM'
})
check_transformation(
Expand All @@ -1698,7 +1698,7 @@ def test_number_of_pages(app):
<subfield code="a">42 p. ; 2 CD-ROM ; 1 DVD, 1 vhs</subfield>
</datafield>
""", {
'number_of_pages': 42,
'number_of_pages': "42",
'physical_copy_description': '2 CD-ROM, 1 DVD, 1 VHS'
})
check_transformation(
Expand Down Expand Up @@ -2209,10 +2209,7 @@ def test_note(app):
</datafield>
""",
{
'note':
{
'value': """Translated from ... / No CD-ROM"""
},
'note': """Translated from ... / No CD-ROM"""
}
)
check_transformation(
Expand All @@ -2224,11 +2221,7 @@ def test_note(app):
</subfield>
</datafield>
""", {
'note':
{
'value': """Comments: Book, 380 p.,""",
'source': 'arXiv',
},
'note': """Comments: Book, 380 p.,"""

}
)
Expand Down Expand Up @@ -2504,7 +2497,7 @@ def test_541(app):
{'value': "practice and application",
'type': 'SUBTITLE'}
],
'recid': 2654497,
'legacy_recid': 2654497,
'isbns': [
{
'medium': "print version, hardback",
Expand Down Expand Up @@ -2571,7 +2564,7 @@ def test_keywords(app):
""",
{
'keywords': [
{'name': 'Keyword Name 1', 'source': 'PACS'},
{'value': 'Keyword Name 1', 'source': 'PACS'},
],
}
)
Expand Down
7 changes: 5 additions & 2 deletions tests/test_matcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
# You should have received a copy of the GNU General Public License
# along with Invenio; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place, Suite 330, Boston, MA 02D111-1307, USA.

import pytest
from dojson.contrib import marc21 as default

from cds_dojson.marc21.models.books import book, standard, serial, multipart
Expand Down Expand Up @@ -70,7 +70,10 @@ def test_marc21_matcher_books():

assert book.model == matcher(book_blob1, 'cds_dojson.marc21.models')
assert book.model == matcher(book_blob2, 'cds_dojson.marc21.models')
assert book.model == matcher(book_blob3, 'cds_dojson.marc21.models')

with pytest.raises(AssertionError):
# English book club should not be matched
assert book.model == matcher(book_blob3, 'cds_dojson.marc21.models')
assert standard.model == matcher(standard_blob1,
'cds_dojson.marc21.models')
assert serial.model == matcher(serial_blob1,
Expand Down

0 comments on commit 746653c

Please sign in to comment.