Skip to content

Commit

Permalink
ignore keys
Browse files Browse the repository at this point in the history
  • Loading branch information
egabancho committed Jul 4, 2017
1 parent 6dc4ea9 commit b2e27a6
Show file tree
Hide file tree
Showing 8 changed files with 142 additions and 87 deletions.
3 changes: 1 addition & 2 deletions cds_dojson/cli.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
#
# This file is part of CERN Document Server.
# Copyright (C) 2016 CERN.
# Copyright (C) 2016, 2017 CERN.
#
# CERN Document Server is free software; you can redistribute it
# and/or modify it under the terms of the GNU General Public License as
Expand Down Expand Up @@ -37,7 +37,6 @@
@click.group()
def cli():
"""CDS dojson CLI."""
pass


@cli.command()
Expand Down
9 changes: 5 additions & 4 deletions cds_dojson/marc21/utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
#
# This file is part of CERN Document Server.
# Copyright (C) 2015 CERN.
# Copyright (C) 2015, 2017 CERN.
#
# Invenio is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
Expand All @@ -16,13 +16,14 @@
# You should have received a copy of the GNU General Public License
# along with Invenio; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.

"""Utilities for converting MARC21."""

from lxml import etree
from six import StringIO, binary_type, text_type
from dojson.contrib.marc21.utils import split_stream, MARC21_DTD

from ..utils import MementoDict


def create_record(marcxml, correct=False, keep_singletons=True):
"""Create a record object using the LXML parser.
Expand Down Expand Up @@ -80,9 +81,9 @@ def create_record(marcxml, correct=False, keep_singletons=True):

if fields or keep_singletons:
key = '{0}{1}{2}'.format(tag, ind1, ind2)
record.append((key, dict(fields)))
record.append((key, MementoDict(fields)))

return dict(record)
return MementoDict(record)


def load(source):
Expand Down
33 changes: 16 additions & 17 deletions cds_dojson/overdo.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
#
# This file is part of CERN Document Server.
# Copyright (C) 2015 CERN.
# Copyright (C) 2015, 2017 CERN.
#
# Invenio is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
Expand All @@ -16,14 +16,13 @@
# You should have received a copy of the GNU General Public License
# along with Invenio; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.

"""Base classes for CDS DoJSON."""

import pkg_resources
from dojson.contrib.to_marc21.model import Underdo as DoJSONUnderdo
from dojson.overdo import Overdo as DoJSONOverdo

from .matcher import matcher
from .utils import not_accessed_keys

try:
pkg_resources.get_distribution('flask')
Expand All @@ -37,8 +36,10 @@
class OverdoBase(DoJSONOverdo):
"""Base entry class."""

def __init__(
self, bases=None, entry_point_group=None, entry_point_models=None):
def __init__(self,
bases=None,
entry_point_group=None,
entry_point_models=None):
"""Init."""
super(OverdoBase, self).__init__(bases, entry_point_group)
self.entry_point_models = entry_point_models
Expand All @@ -62,6 +63,9 @@ class Overdo(DoJSONOverdo):
__query__ = ''
"""To be used by the matcher to find the proper model."""

__ignore_keys__ = []
"""List of keys which don't need transformation."""

def over(self, name, *source_tags, **kwargs):
"""Register creator rule.
Expand All @@ -70,6 +74,7 @@ def over(self, name, *source_tags, **kwargs):
regular expression in `source_tags` are equal to the current
ones.
"""

def override(rule):
if name == rule[1][0]:
return True
Expand All @@ -83,6 +88,11 @@ def override(rule):

return super(Overdo, self).over(name, *source_tags)

def missing(self, blob, **kwargs):
"""Return keys with missing rules."""
return set(self.__class__.__ignore_keys__).symmetric_difference(
not_accessed_keys(blob))


class OverdoJSONSchema(Overdo):
"""Translation index which adds $schema key."""
Expand All @@ -95,8 +105,7 @@ def do(self, blob, ignore_missing=True, exception_handlers=None):
json = super(Overdo, self).do(
blob=blob,
ignore_missing=ignore_missing,
exception_handlers=exception_handlers
)
exception_handlers=exception_handlers)
if HAS_FLASK:
json_schema = current_app.extensions['invenio-jsonschemas']
json['$schema'] = {
Expand All @@ -106,13 +115,3 @@ def do(self, blob, ignore_missing=True, exception_handlers=None):
json['$schema'] = {'$ref': self.__class__.__schema__}

return json

def missing(self, blob, **kwargs):
"""Return keys with missing rules."""
return super(OverdoJSONSchema, self).missing(blob)


class Underdo(Overdo, DoJSONUnderdo):
"""Translation index specification for reverse marc21 translation."""

pass
69 changes: 64 additions & 5 deletions cds_dojson/utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
#
# This file is part of CERN Document Server.
# Copyright (C) 2015 CERN.
# Copyright (C) 2015, 2017 CERN.
#
# Invenio is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
Expand All @@ -16,23 +16,57 @@
# You should have received a copy of the GNU General Public License
# along with Invenio; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place, Suite 330, Boston, MA 02D111-1307, USA.

"""The CDS DoJson Utils."""

import functools
from collections import defaultdict

from collections import MutableMapping, MutableSequence
import arrow
import six


class MementoDict(dict):
"""Dictionary that remembers which keys have being access."""

def __init__(self, *args, **kwargs):
self.memory = set()
super(MementoDict, self).__init__(*args, **kwargs)

def iteritems(self, skyp_memento=False):
for key, value in super(MementoDict, self).iteritems():
if not skyp_memento:
self.memory.add(key)
yield (key, value)
items = iteritems

def __getitem__(self, key):
self.memory.add(key)
return super(MementoDict, self).__getitem__(key)

def get(self, key, default=None):
self.memory.add(key)
return super(MementoDict, self).get(key, default)

@property
def accessed_keys(self):
"""Get the list of accessed keys."""
return self.memory

@property
def not_accessed_keys(self):
"""Get l the list of non-accessed keys."""
return set(self.keys()).difference(self.memory)


def for_each_squash(f):
"""In case of non repeatable field squash them into one.
.. example::
[{'a': 'foo'}, {'b': 'bar'}] -> {'a': 'foo', 'b': 'barc'}
[{'a': 'foo'}, {'a': 'bar'}] -> {'a': ['foo', 'barc']}
"""

@functools.wraps(f)
def wrapper(self, key, values, **kwargs):
if not isinstance(values, list):
Expand All @@ -45,16 +79,41 @@ def wrapper(self, key, values, **kwargs):
for key, element in six.iteritems(unmerged_dict):
merge_dict[key].append(element)

merge_dict = {key: (value if len(value) > 1 else value[0])
for key, value in six.iteritems(merge_dict)}
merge_dict = {
key: (value if len(value) > 1 else value[0])
for key, value in six.iteritems(merge_dict)
}
return merge_dict

return wrapper


def not_accessed_keys(blob):
"""Calculate not accessed keys from the blob.
It assumes the blob is an instance of MementoDict or a list."""
missing = set()
if isinstance(blob, MutableMapping):
missing = blob.not_accessed_keys
for key, value in blob.iteritems(skyp_memento=True):
partial_missing = not_accessed_keys(value)
if partial_missing:
missing.update(
['{0}{1}'.format(key, f) for f in partial_missing])
if key in missing:
missing.remove(key)
elif isinstance(blob, MutableSequence):
for value in blob:
missing.update(not_accessed_keys(value))

return missing


def convert_date_to_iso_8601(date, format_='YYYY-MM-DD', **kwargs):
"""Convert a date string its ISO 8601 representation.
YYYY-MM-DDThh:mm:ss.sTZD (eg 1997-07-16T19:20:30.45+01:00)
YYYY-MM-DDThh:mm:ss.sTZD (eg
1997-07-16T19:20:30.45+01:00)
YYYY = four-digit year
MM = two-digit month (01=January, etc.)
Expand Down
18 changes: 1 addition & 17 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
#
# This file is part of Invenio.
# Copyright (C) 2015 CERN.
# Copyright (C) 2015, 2017 CERN.
#
# Invenio is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
Expand All @@ -17,15 +17,10 @@
# along with Invenio; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place, Suite 330, Boston, MA 02D111-1307, USA.

import os

import pkg_resources
import pytest
from flask import Flask
from invenio_jsonschemas import InvenioJSONSchemas

from cds_dojson.marc21.utils import create_record


@pytest.fixture()
def app():
Expand All @@ -34,14 +29,3 @@ def app():
app_.config.update(TESTING=True)
InvenioJSONSchemas(app_)
return app_


@pytest.fixture()
def marcxml_to_json(app, request):
"""Load marcxml file and return the JSON."""
file_, model = request.param

marcxml = pkg_resources.resource_string(__name__,
os.path.join('fixtures', file_))
with app.app_context():
return model.do(create_record(marcxml))
4 changes: 1 addition & 3 deletions tests/demo/json_resolver.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
#
# This file is part of CDS.
# Copyright (C) 2016 CERN.
# Copyright (C) 2016, 2017 CERN.
#
# CDS is free software; you can redistribute it
# and/or modify it under the terms of the GNU General Public License as
Expand Down Expand Up @@ -35,8 +35,6 @@
host='cdslabs.cern.ch')
def resolve_definitions(path):
"""Resolve the JSON definition schema."""
# import pytest
# pytest.set_trace()
with open(pkg_resources.resource_filename(
'cds_dojson.schemas', path), 'r') as f:
return json.load(f)
26 changes: 18 additions & 8 deletions tests/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,25 @@
# 59 Temple Place, Suite 330, Boston, MA 02D111-1307, USA.
"""Base model tests."""

import pytest
import os

import pkg_resources
from cds_dojson.marc21.models.base import model
from cds_dojson.marc21.utils import create_record


@pytest.mark.parametrize(
'marcxml_to_json', [('base.xml', model)], indirect=True)
def test_base_model(app, marcxml_to_json):
def test_base_model(app):
"""Test base model."""
record = marcxml_to_json
assert record['recid'] == 1495143
assert record['agency_code'] == 'SzGeCERN'
assert record['modification_date'] == '20170316170631.0'
marcxml = pkg_resources.resource_string(__name__,
os.path.join(
'fixtures', 'base.xml'))

with app.app_context():
blob = create_record(marcxml)
assert model.missing(blob) == {'001', '003', '005'}

record = model.do(blob)
assert record['recid'] == 1495143
assert record['agency_code'] == 'SzGeCERN'
assert record['modification_date'] == '20170316170631.0'
assert not model.missing(blob)
Loading

0 comments on commit b2e27a6

Please sign in to comment.