Skip to content

Commit

Permalink
Merge 2b88c9e into 650a821
Browse files Browse the repository at this point in the history
  • Loading branch information
egabancho committed Jul 7, 2017
2 parents 650a821 + 2b88c9e commit 28bdce5
Show file tree
Hide file tree
Showing 20 changed files with 1,315 additions and 1,103 deletions.
2 changes: 1 addition & 1 deletion cds_dojson/marc21/fields/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def recid(self, key, value):
@marc21.over('agency_code', '^003')
def agency_code(self, key, value):
"""Control number identifier."""
return value
return value or 'SzGeCERN'


@marc21.over('modification_date', '^005')
Expand Down
19 changes: 19 additions & 0 deletions cds_dojson/marc21/fields/videos/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# -*- coding: utf-8 -*-
#
# This file is part of CERN Document Server.
# Copyright (C) 2017 CERN.
#
# Invenio is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 2 of the
# License, or (at your option) any later version.
#
# Invenio is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Invenio; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place, Suite 330, Boston, MA 02D111-1307, USA.
"""CDS Video fields.."""
195 changes: 195 additions & 0 deletions cds_dojson/marc21/fields/videos/video.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
# -*- coding: utf-8 -*-
#
# This file is part of CERN Document Server.
# Copyright (C) 2017 CERN.
#
# Invenio is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 2 of the
# License, or (at your option) any later version.
#
# Invenio is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Invenio; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""Video fields."""

import re

import requests
from dojson.utils import filter_values, for_each_value, force_list

from ...models.videos.video import model

# Required fields


@model.over('title', '^245_[1_]')
@filter_values
def title(self, key, value):
"""Title."""
return {
'title': value.get('a'),
'subtitle': value.get('b'),
}


@model.over('description', '^520__')
def description(self, key, value):
"""Description."""
return value.get('a')


@model.over('date', '^269__')
def date(self, key, value):
"""Date."""
return value.get('c')


@model.over('publication_date', '937__')
def publication_date(self, key, value):
"""Publication Date."""
# TODO: normalize data
return value.get('c')


def _get_author_info_from_people_collection(info):
"""Get author information from CDS auto-completion endpoint."""
# TODO: probably we will need to extract this somewhere else
URL = 'https://cds.cern.ch/submit/get_authors?query={0}&relative_curdir=cdslabs%2Fvideos'
if '0' in info or not info.get('a'):
# There is already enough information or we don't have a name to query
return info
author_info = requests.get(URL.format(info.get('a'))).json()
if not author_info or len(author_info) > 1:
# Didn't find anything or find to many matches
return info

# Prepare author name
author_info = author_info[0]
if 'name' not in author_info:
author_info['name'] = '{0}, {1}'.format(author_info['lastname'],
author_info['firstname'])
return author_info


def _get_correct_role(role):
"""Clean up roles."""
# TODO: decide on roles and values
return role


def _extract_json_ids(info):
"""."""
SOURCES = {
'AUTHOR|(INSPIRE)': 'INSPIRE',
'AUTHOR|(CDS)': 'CDS',
'(SzGeCERN)': 'CERN'
}
regex = re.compile('((AUTHOR\|\((CDS|INSPIRE)\))|(\(SzGeCERN\)))(.*)')
ids = []
for id_ in info.get('0', []):
match = regex.match(id_)
if not match:
pass
ids.append({
'value': match.group(5),
'source': SOURCES[match.group(1)]
})
# Try and get the IDs from the auto-completion
try:
ids.append({'value': info['cernccid'], 'source': 'CERN'})
except KeyError:
pass
try:
ids.append({'value': info['recid'], 'source': 'CDS'})
except KeyError:
pass
try:
ids.append({'value': info['inspireid'], 'source': 'INSPIRE'})
except KeyError:
pass

return ids


@filter_values
def _build_contributor(value):
"""Create a."""
value = _get_author_info_from_people_collection(value)

return {
'ids': _extract_json_ids(value) or None,
'name': value.get('a') or value.get('name'),
'affiliations': force_list(value.get('u') or value.get('affiliation')),
'role': _get_correct_role(value.get('e')),
'email': value.get('email'),
}


@model.over('contributors', '^(100|700)__')
def contributors(self, key, value):
"""Contributors."""
authors = self.get('contributors', [])
values = force_list(value)
for value in values:
authors.append(_build_contributor(value))
return authors


@model.over('report_number', '^(037|088)__')
@for_each_value
def report_number(self, key, value):
"""Report number.
Category and type are also derived from the report number.
"""
rn = value.get('a') or value.get('9')
if rn and key.startswith('037__'):
# Extract category and type only from main report number, i.e. 037__a
self['category'], self['type'] = rn.split('-')[:2]

return rn


@model.over('duration', '^300__')
def duration(self, key, value):
"""Duration.
The new duration must be expressed in the form hh:mm:ss[.mmm]
"""
try:
return re.match('(\d{2}:\d{2}:\d{2})(\.\d+)?', value.get('a')).group(1)
except AttributeError:
# The regex didn't match
# TODO: should we try to match something else?
return None


# Access
@model.over('_access', '(^859__)|(^506[1_]_)')
def access(self, key, value):
"""Access rights.
It includes read/update access.
- 859__f contains the email of the submitter.
- 506__m/5061_d list of groups or emails of people who can access the
record. The groups are in the form <group-name> [CERN] which needs to be
transform into the email form.
"""
_access = self.get('_access', {})
for value in force_list(value):
if key == '859__' and 'f' in value:
_access.setdefault('update', [])
_access['update'].append(value.get('f'))
elif key.startswith('506'):
_access.setdefault('read', [])
_access['read'].extend([
s.replace(' [CERN]', '@cern.ch')
for s in force_list(value.get('d') or value.get('m', '')) if s
])
return _access
2 changes: 2 additions & 0 deletions cds_dojson/marc21/models/videos/video.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ class CDSVideo(OverdoJSONSchema):

__schema__ = 'records/videos/video/video-v1.0.0.json'

__ignore_keys__ = {'035__9', '035__a', '5061_2', '5061_5', '5061_a'}


model = CDSVideo(bases=(cds_base, ),
entry_point_group='cds_dojson.marc21.video')
4 changes: 2 additions & 2 deletions cds_dojson/overdo.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ class Overdo(DoJSONOverdo):
__query__ = ''
"""To be used by the matcher to find the proper model."""

__ignore_keys__ = []
__ignore_keys__ = set()
"""List of keys which don't need transformation."""

def over(self, name, *source_tags, **kwargs):
Expand All @@ -89,7 +89,7 @@ def override(rule):

def missing(self, blob, **kwargs):
"""Return keys with missing rules."""
return set(self.__class__.__ignore_keys__).symmetric_difference(
return self.__class__.__ignore_keys__.symmetric_difference(
not_accessed_keys(blob))


Expand Down
Loading

0 comments on commit 28bdce5

Please sign in to comment.