Skip to content

Commit

Permalink
utils: add ignore keys
Browse files Browse the repository at this point in the history
* Adds list of ignored keys for each of the models, plus a custom
  dictionary to keep track of the keys that were accessed to better
  calculate the list of missing keys.
  • Loading branch information
egabancho committed Jul 4, 2017
1 parent 7182cbf commit 0e51c32
Show file tree
Hide file tree
Showing 8 changed files with 143 additions and 86 deletions.
3 changes: 1 addition & 2 deletions cds_dojson/cli.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
#
# This file is part of CERN Document Server.
# Copyright (C) 2016 CERN.
# Copyright (C) 2016, 2017 CERN.
#
# CERN Document Server is free software; you can redistribute it
# and/or modify it under the terms of the GNU General Public License as
Expand Down Expand Up @@ -37,7 +37,6 @@
@click.group()
def cli():
"""CDS dojson CLI."""
pass


@cli.command()
Expand Down
9 changes: 5 additions & 4 deletions cds_dojson/marc21/utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
#
# This file is part of CERN Document Server.
# Copyright (C) 2015 CERN.
# Copyright (C) 2015, 2017 CERN.
#
# Invenio is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
Expand All @@ -16,13 +16,14 @@
# You should have received a copy of the GNU General Public License
# along with Invenio; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.

"""Utilities for converting MARC21."""

from lxml import etree
from six import StringIO, binary_type, text_type
from dojson.contrib.marc21.utils import split_stream, MARC21_DTD

from ..utils import MementoDict


def create_record(marcxml, correct=False, keep_singletons=True):
"""Create a record object using the LXML parser.
Expand Down Expand Up @@ -81,9 +82,9 @@ def create_record(marcxml, correct=False, keep_singletons=True):

if fields or keep_singletons:
key = '{0}{1}{2}'.format(tag, ind1, ind2)
record.append((key, dict(fields)))
record.append((key, MementoDict(fields)))

return dict(record)
return MementoDict(record)


def load(source):
Expand Down
32 changes: 15 additions & 17 deletions cds_dojson/overdo.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
#
# This file is part of CERN Document Server.
# Copyright (C) 2015 CERN.
# Copyright (C) 2015, 2017 CERN.
#
# Invenio is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
Expand All @@ -16,14 +16,13 @@
# You should have received a copy of the GNU General Public License
# along with Invenio; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.

"""Base classes for CDS DoJSON."""

import pkg_resources
from dojson.contrib.to_marc21.model import Underdo as DoJSONUnderdo
from dojson.overdo import Overdo as DoJSONOverdo

from .matcher import matcher
from .utils import not_accessed_keys

try:
pkg_resources.get_distribution('flask')
Expand All @@ -37,8 +36,10 @@
class OverdoBase(DoJSONOverdo):
"""Base entry class."""

def __init__(
self, bases=None, entry_point_group=None, entry_point_models=None):
def __init__(self,
bases=None,
entry_point_group=None,
entry_point_models=None):
"""Init."""
super(OverdoBase, self).__init__(bases, entry_point_group)
self.entry_point_models = entry_point_models
Expand All @@ -62,6 +63,9 @@ class Overdo(DoJSONOverdo):
__query__ = ''
"""To be used by the matcher to find the proper model."""

__ignore_keys__ = []
"""List of keys which don't need transformation."""

def over(self, name, *source_tags, **kwargs):
"""Register creator rule.
Expand All @@ -83,6 +87,11 @@ def override(rule):

return super(Overdo, self).over(name, *source_tags)

def missing(self, blob, **kwargs):
"""Return keys with missing rules."""
return set(self.__class__.__ignore_keys__).symmetric_difference(
not_accessed_keys(blob))


class OverdoJSONSchema(Overdo):
"""Translation index which adds $schema key."""
Expand All @@ -95,8 +104,7 @@ def do(self, blob, ignore_missing=True, exception_handlers=None):
json = super(Overdo, self).do(
blob=blob,
ignore_missing=ignore_missing,
exception_handlers=exception_handlers
)
exception_handlers=exception_handlers)
if HAS_FLASK:
json_schema = current_app.extensions['invenio-jsonschemas']
json['$schema'] = {
Expand All @@ -106,13 +114,3 @@ def do(self, blob, ignore_missing=True, exception_handlers=None):
json['$schema'] = {'$ref': self.__class__.__schema__}

return json

def missing(self, blob, **kwargs):
"""Return keys with missing rules."""
return super(OverdoJSONSchema, self).missing(blob)


class Underdo(Overdo, DoJSONUnderdo):
"""Translation index specification for reverse marc21 translation."""

pass
70 changes: 66 additions & 4 deletions cds_dojson/utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
#
# This file is part of CERN Document Server.
# Copyright (C) 2015 CERN.
# Copyright (C) 2015, 2017 CERN.
#
# Invenio is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
Expand All @@ -16,16 +16,53 @@
# You should have received a copy of the GNU General Public License
# along with Invenio; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place, Suite 330, Boston, MA 02D111-1307, USA.

"""The CDS DoJson Utils."""

import functools
from collections import defaultdict

from collections import MutableMapping, MutableSequence
import arrow
import six


class MementoDict(dict):
"""Dictionary that remembers which keys have being access."""

def __init__(self, *args, **kwargs):
"""Set memory and create the dictionary."""
self.memory = set()
super(MementoDict, self).__init__(*args, **kwargs)

def iteritems(self, skyp_memento=False):
"""Add to memory the keys while iterating if not skyp."""
for key, value in six.iteritems(super(MementoDict, self)):
if not skyp_memento:
self.memory.add(key)
yield (key, value)
items = iteritems

def __getitem__(self, key):
"""Add the key to memory before running the get."""
self.memory.add(key)
return super(MementoDict, self).__getitem__(key)

def get(self, key, default=None):
"""Add the key to memory before running the get."""
self.memory.add(key)
return super(MementoDict, self).get(key, default)

@property
def accessed_keys(self):
"""Get the list of accessed keys."""
return self.memory

@property
def not_accessed_keys(self):
"""Get l the list of non-accessed keys."""
return set(self.keys()).difference(self.memory)


def for_each_squash(f):
"""In case of non repeatable field squash them into one.
Expand All @@ -45,12 +82,37 @@ def wrapper(self, key, values, **kwargs):
for key, element in six.iteritems(unmerged_dict):
merge_dict[key].append(element)

merge_dict = {key: (value if len(value) > 1 else value[0])
for key, value in six.iteritems(merge_dict)}
merge_dict = {
key: (value if len(value) > 1 else value[0])
for key, value in six.iteritems(merge_dict)
}
return merge_dict

return wrapper


def not_accessed_keys(blob):
"""Calculate not accessed keys from the blob.
It assumes the blob is an instance of MementoDict or a list.
"""
missing = set()
if isinstance(blob, MutableMapping):
missing = blob.not_accessed_keys
for key, value in blob.iteritems(skyp_memento=True):
partial_missing = not_accessed_keys(value)
if partial_missing:
missing.update(
['{0}{1}'.format(key, f) for f in partial_missing])
if key in missing:
missing.remove(key)
elif isinstance(blob, MutableSequence):
for value in blob:
missing.update(not_accessed_keys(value))

return missing


def convert_date_to_iso_8601(date, format_='YYYY-MM-DD', **kwargs):
"""Convert a date string its ISO 8601 representation.
Expand Down
18 changes: 1 addition & 17 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
#
# This file is part of Invenio.
# Copyright (C) 2015 CERN.
# Copyright (C) 2015, 2017 CERN.
#
# Invenio is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
Expand All @@ -18,15 +18,10 @@
# 59 Temple Place, Suite 330, Boston, MA 02D111-1307, USA.
"""Test fixtures."""

import os

import pkg_resources
import pytest
from flask import Flask
from invenio_jsonschemas import InvenioJSONSchemas

from cds_dojson.marc21.utils import create_record


@pytest.fixture()
def app():
Expand All @@ -35,14 +30,3 @@ def app():
app_.config.update(TESTING=True)
InvenioJSONSchemas(app_)
return app_


@pytest.fixture()
def marcxml_to_json(app, request):
"""Load marcxml file and return the JSON."""
file_, model = request.param

marcxml = pkg_resources.resource_string(__name__,
os.path.join('fixtures', file_))
with app.app_context():
return model.do(create_record(marcxml))
4 changes: 1 addition & 3 deletions tests/demo/json_resolver.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
#
# This file is part of CDS.
# Copyright (C) 2016 CERN.
# Copyright (C) 2016, 2017 CERN.
#
# CDS is free software; you can redistribute it
# and/or modify it under the terms of the GNU General Public License as
Expand Down Expand Up @@ -37,8 +37,6 @@
host='cdslabs.cern.ch')
def resolve_definitions(path):
"""Resolve the JSON definition schema."""
# import pytest
# pytest.set_trace()
with open(pkg_resources.resource_filename(
'cds_dojson.schemas', path), 'r') as f:
return json.load(f)
26 changes: 18 additions & 8 deletions tests/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,25 @@
# 59 Temple Place, Suite 330, Boston, MA 02D111-1307, USA.
"""Base model tests."""

import pytest
import os

import pkg_resources
from cds_dojson.marc21.models.base import model
from cds_dojson.marc21.utils import create_record


@pytest.mark.parametrize(
'marcxml_to_json', [('base.xml', model)], indirect=True)
def test_base_model(app, marcxml_to_json):
def test_base_model(app):
"""Test base model."""
record = marcxml_to_json
assert record['recid'] == 1495143
assert record['agency_code'] == 'SzGeCERN'
assert record['modification_date'] == '20170316170631.0'
marcxml = pkg_resources.resource_string(__name__,
os.path.join(
'fixtures', 'base.xml'))

with app.app_context():
blob = create_record(marcxml)
assert model.missing(blob) == {'001', '003', '005'}

record = model.do(blob)
assert record['recid'] == 1495143
assert record['agency_code'] == 'SzGeCERN'
assert record['modification_date'] == '20170316170631.0'
assert not model.missing(blob)
Loading

0 comments on commit 0e51c32

Please sign in to comment.