Skip to content

Commit

Permalink
books: journals migration rules
Browse files Browse the repository at this point in the history
* journal record match queries
* ignored fields
  • Loading branch information
kpsherva committed Jul 21, 2020
1 parent 1806a5b commit 3d45219
Show file tree
Hide file tree
Showing 10 changed files with 561 additions and 30 deletions.
164 changes: 164 additions & 0 deletions cds_dojson/marc21/fields/books/journal.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
# -*- coding: utf-8 -*-
#
# This file is part of CERN Document Server.
# Copyright (C) 2020 CERN.
#
# Invenio is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 2 of the
# License, or (at your option) any later version.
#
# Invenio is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Invenio; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""Journal fields."""
import re

import pycountry
from dojson.utils import for_each_value

from cds_dojson.marc21.fields.books.book import title as base_title
from cds_dojson.marc21.fields.books.errors import UnexpectedValue
from cds_dojson.marc21.fields.books.multipart import \
number_of_volumes as multipart_number_of_volumes
from cds_dojson.marc21.fields.utils import clean_val, filter_list_values, \
out_strip
from cds_dojson.marc21.models.books.journal import model


@model.over('legacy_recid', '^001')
def recid(self, key, value):
"""Record Identifier."""
return int(value)


@model.over('title', '^245__')
@out_strip
def title(self, key, value):
"""Translates title."""
return base_title(self, key, value)


@model.over('alternative_titles', '^246_3')
@filter_list_values
def alternative_titles(self, key, value):
"""Translates alternative titles."""
_alternative_titles = self.get('alternative_titles', [])

if 'a' in value:
_alternative_titles.append({
'value': clean_val('a', value, str, req=True),
'type': 'ALTERNATIVE_TITLE',
})
if 'b' in value:
_alternative_titles.append({
'value': clean_val('b', value, str, req=True),
'type': 'SUBTITLE',
})
return _alternative_titles


@model.over('abbreviated_title', '^210__')
@out_strip
def abbreviated_title(self, key, value):
"""Translates abbreviated title field."""
return clean_val('a', value, str, req=True)


@model.over('identifiers', '^022__')
@filter_list_values
@for_each_value
def identifiers(self, key, value):
"""Translates identifiers fields."""
val_a = clean_val('a', value, str, req=True)
return {
'scheme': 'ISSN',
'value': val_a,
'material': clean_val('b', value, str),
}


@model.over('internal_notes', '^937__')
@for_each_value
@out_strip
def internal_notes(self, key, value):
"""Translates internal notes field."""
return {'value': clean_val('a', value, str, req=True)}


@model.over('note', '^935__')
@out_strip
def note(self, key, value):
"""Translates note field."""
return clean_val('a', value, str, req=True)


@model.over('publisher', '^933__')
@out_strip
def publisher(self, key, value):
"""Translates publisher field."""
return clean_val('b', value, str, req=True)


@model.over('languages', '^041__')
@for_each_value
@out_strip
def languages(self, key, value):
"""Translates languages fields."""
lang = clean_val('a', value, str).lower()
try:
return pycountry.languages.lookup(lang).alpha_2
except (KeyError, AttributeError, LookupError):
raise UnexpectedValue(subfield='a')


@model.over('_migration', '(^362__)|(^85641)|(^866__)|(^780__)|(^785__)')
def migration(self, key, value):
"""Translates fields related to children record types."""
_migration = self.get('_migration', {})
_electronic_items = _migration.get('electronic_items', [])
_items = _migration.get('items', [])
_relation_previous = _migration.get('relation_previous')
_relation_next = _migration.get('relation_next')
if key == '362__':
_electronic_items.append(
# TODO fix - the values cant come from the same subfield
{'start_volume': clean_val('a', value, str),
'end_volume': clean_val('a', value, str),
'year': clean_val('a', value, str),
}
)
if key == '85641':
_electronic_items.append(
# TODO fix - the values cant come from the same subfield
{'start_volume': clean_val('3', value, str),
'end_volume': clean_val('3', value, str),
'year': clean_val('3', value, str),
'url': clean_val('u', value, str),
'access_type': clean_val('x', value, str),
'note': clean_val('z', value, str),
}
)
if key == '866__':
_items.append({
'start_volume': clean_val('a', value, str),
'end_volume': clean_val('a', value, str),
'year': clean_val('a', value, str),
'location': clean_val('b', value, str)
})
if key == '780__':
_relation_previous = clean_val('w', value, str, req=True)

if key == '785__':
_relation_next = clean_val('w', value, str, req=True)

_migration.update({'electronic_items': _electronic_items, 'items': _items,
'relation_previous': _relation_previous,
'relation_next': _relation_next
})
return _migration
2 changes: 1 addition & 1 deletion cds_dojson/marc21/fields/books/serial.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,4 +51,4 @@ def title(self, key, value):
@for_each_value
def identifiers(self, key, value):
"""Translates identifiers fields."""
multipart_identifiers(self, key, value)
return multipart_identifiers(self, key, value)
1 change: 0 additions & 1 deletion cds_dojson/marc21/fields/books/standard.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ def title_translations(self, key, value):
'language': 'fr',
})
return _alternative_titles
# 'source': clean_val('i', value, str),


@model.over('number_of_pages', '^300__') # item
Expand Down
37 changes: 12 additions & 25 deletions cds_dojson/marc21/models/books/book.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,31 +45,18 @@ def do(self, blob, ignore_missing=True, exception_handlers=None):
exception_handlers=exception_handlers)
json['$schema'] = self.__class__.__schema__

if '_migration' in json:
json['_migration'].setdefault('record_type', 'document')
json['_migration'].setdefault('volumes', [])
json['_migration'].setdefault('serials', [])
json['_migration'].setdefault('has_serial', False)
json['_migration'].setdefault('is_multipart', False)
json['_migration'].setdefault('has_tags', False)
json['_migration'].setdefault('has_related', False)
json['_migration'].setdefault('has_journal', False)
json['_migration'].setdefault('tags', [])
json['_migration'].setdefault('journal_record_legacy_recid', '')

else:
json['_migration'] = {
'record_type': 'document',
'has_serial': False,
'is_multipart': False,
'has_tags': False,
'has_related': False,
'has_journal': False,
'journal_record_legacy_recid': '',
'volumes': [],
'serials': [],
'tags': [],
}
if '_migration' not in json:
json['_migration'] = {}
json['_migration'].setdefault('record_type', 'document')
json['_migration'].setdefault('volumes', [])
json['_migration'].setdefault('serials', [])
json['_migration'].setdefault('has_serial', False)
json['_migration'].setdefault('is_multipart', False)
json['_migration'].setdefault('has_tags', False)
json['_migration'].setdefault('has_related', False)
json['_migration'].setdefault('has_journal', False)
json['_migration'].setdefault('tags', [])
json['_migration'].setdefault('journal_record_legacy_recid', '')

return json

Expand Down
72 changes: 72 additions & 0 deletions cds_dojson/marc21/models/books/journal.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
# -*- coding: utf-8 -*-
#
# This file is part of CERN Document Server.
# Copyright (C) 2020 CERN.
#
# Invenio is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 2 of the
# License, or (at your option) any later version.
#
# Invenio is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Invenio; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""Journal model."""

from __future__ import unicode_literals

from ..base import model as cds_base
from .base import COMMON_IGNORE_FIELDS, CDSOverdoBookBase
from .base import model as books_base


class CDSJournal(CDSOverdoBookBase):
"""Translation Index for CDS Books."""

__query__ = '980__:PERI -980__:DELETED -980__:MIGRATED'

__schema__ = 'https://127.0.0.1:5000/schemas/series/series-v1.0.0.json'

__model_ignore_keys__ = {
'780__i', # label of relation continues
'780__t', # title of relation continues
'785__i', # label of relation continued by
'785__t', # title of relation continued by
'85641y',
'933__a',
'960__c',
'980__a',
}

__ignore_keys__ = COMMON_IGNORE_FIELDS | __model_ignore_keys__

def do(self, blob, ignore_missing=True, exception_handlers=None):
"""Set schema after translation depending on the model."""
json = super(CDSJournal, self).do(
blob=blob,
ignore_missing=ignore_missing,
exception_handlers=exception_handlers)
json['$schema'] = self.__class__.__schema__
if '_migration' not in json:
json['_migration'] = {}

json['_migration'].setdefault('record_type', 'journal')
json['_migration'].setdefault('volumes', [])
json['_migration'].setdefault('is_multipart', False)
json['_migration'].setdefault('has_related', False)
json['_migration'].setdefault('items', [])
json['_migration'].setdefault('electronic_items', [])
json['_migration'].setdefault('relation_previous', None)
json['_migration'].setdefault('relation_next', None)

return json


model = CDSJournal(
bases=(),
entry_point_group='cds_dojson.marc21.series')
2 changes: 1 addition & 1 deletion cds_dojson/marc21/models/books/standard.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ class CDSStandard(CDSOverdoBookBase):

__query__ = '690C_:STANDARD OR 980__:STANDARD -980__:DELETED -980__:MIGRATED'

__schema__ = 'records/books/book/book-v.0.0.1.json'
__schema__ = 'https://127.0.0.1:5000/schemas/documents/document-v1.0.0.json'

__ignore_keys__ = COMMON_IGNORE_FIELDS

Expand Down
2 changes: 2 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@
'cds_dojson.marc21.parent_models': [
'books_serial = cds_dojson.marc21.models.books.serial:model',
'books_multipart = cds_dojson.marc21.models.books.multipart:model',
'journal = cds_dojson.marc21.models.books.journal:model',
],
'cds_dojson.marc21.models': [
'videos_video = cds_dojson.marc21.models.videos.video:model',
Expand Down Expand Up @@ -131,6 +132,7 @@
'cds_dojson.marc21.series': [
'serial = cds_dojson.marc21.fields.books.serial',
'multipart = cds_dojson.marc21.fields.books.multipart',
'journal = cds_dojson.marc21.fields.books.journal',
],
# DoJSON entry points
'console_scripts': [
Expand Down

0 comments on commit 3d45219

Please sign in to comment.