Skip to content

Commit

Permalink
Improve JSON validation and error dict construction
Browse files Browse the repository at this point in the history
  • Loading branch information
mark-saeon committed Jul 5, 2018
1 parent 2fea33b commit 026b6df
Show file tree
Hide file tree
Showing 4 changed files with 84 additions and 19 deletions.
67 changes: 59 additions & 8 deletions ckanext/metadata/jsonschema_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,44 @@
checks_format = jsonschema.FormatChecker.cls_checks


def validate(instance, schema):

def clear_empties(node):
if type(node) is dict:
for element in node.keys():
clear_empties(node[element])
if not node[element]:
del node[element]
elif type(node) is list:
for element in node:
clear_empties(element)
if not element:
node.remove(element)

def add_error(node, path, message):
if path:
element = path.popleft()
else:
element = u'__global'

if path:
if element not in node:
node[element] = {}
add_error(node[element], path, message)
else:
if element not in node:
node[element] = []
node[element] += [message]

errors = {}
validator = create_validator(schema)
clear_empties(instance)
for error in validator.iter_errors(instance):
add_error(errors, error.path, error.message)

return errors


def check_schema(schema):
cls = jsonschema.validators.validator_for(schema)
cls.check_schema(schema)
Expand All @@ -37,7 +75,7 @@ def create_validator(schema):
return cls(schema, format_checker=jsonschema.FormatChecker(
formats=[
'doi',
'uri',
'uri', # implemented in jsonschema._format.py; requires rfc3987
'url',
'year',
'yearmonth',
Expand All @@ -53,14 +91,14 @@ def create_validator(schema):


def vocabulary_validator(validator, vocabulary_name, instance, schema):
if validator.is_type(instance, "string"):
if validator.is_type(instance, 'string'):
try:
vocabulary = tk.get_action('vocabulary_show')(data_dict={'id': vocabulary_name})
tags = [tag['name'] for tag in vocabulary['tags']]
if instance not in tags:
yield jsonschema.ValidationError(_('Tag not found in vocabulary'))
except tk.ObjectNotFound:
yield jsonschema.ValidationError('%s: %s' % (_('Not found'), _('Vocabulary')))
yield jsonschema.ValidationError("%s: %s '%s'" % (_('Not found'), _('Vocabulary'), vocabulary_name))


@checks_format('doi')
Expand Down Expand Up @@ -105,7 +143,6 @@ def is_yearmonth(instance):
return False


# TODO: check if this replaces or adds to the existing date format checkers
@checks_format('date')
def is_date(instance):
if not isinstance(instance, basestring):
Expand All @@ -127,7 +164,12 @@ def is_datetime(instance):
time_match = re.match(TIME_RE, timestr)
if time_match:
h, m, s, tzh, tzm = time_match.group('h', 'm', 's', 'tzh', 'tzm')
if 0 <= int(h) <= 23 and 0 <= int(m) <= 59 and 0 <= int(s) <= 59 and 0 <= int(tzm) <= 59:
if (
0 <= int(h) <= 23 and
0 <= int(m) <= 59 and
0 <= int(s) <= 59 and
0 <= int(tzm) <= 59
):
return True
return False
except ValueError:
Expand Down Expand Up @@ -175,7 +217,10 @@ def is_geolocation_point(instance):
match = re.match(GEO_POINT_RE, instance)
if match:
lat, lon = match.group('lat', 'lon')
if -90 <= float(lat) <= 90 and -180 <= float(lon) <= 180:
if (
-90 <= float(lat) <= 90 and
-180 <= float(lon) <= 180
):
return True
return False

Expand All @@ -187,7 +232,13 @@ def is_geolocation_box(instance):
match = re.match(GEO_BOX_RE, instance)
if match:
lat1, lon1, lat2, lon2 = match.group('lat1', 'lon1', 'lat2', 'lon2')
if -90 <= float(lat1) <= 90 and -180 <= float(lon1) <= 180 and -90 <= float(lat2) <= 90 and -180 <= float(lon2) <= 180 and \
float(lat1) <= float(lat2) and float(lon1) <= float(lon2):
if (
-90 <= float(lat1) <= 90 and
-180 <= float(lon1) <= 180 and
-90 <= float(lat2) <= 90 and
-180 <= float(lon2) <= 180 and
float(lat1) <= float(lat2) and
float(lon1) <= float(lon2)
):
return True
return False
6 changes: 1 addition & 5 deletions ckanext/metadata/logic/action/get.py
Original file line number Diff line number Diff line change
Expand Up @@ -568,11 +568,7 @@ def metadata_validity_check(context, data_dict):
metadata_json = json.loads(data['metadata_json'])
model_json = json.loads(data['model_json'])

metadata_errors = {}
validator = jsonschema_validation.create_validator(model_json)
for metadata_error in validator.iter_errors(metadata_json):
metadata_errors[tuple(metadata_error.path)] = metadata_error.message

metadata_errors = jsonschema_validation.validate(metadata_json, model_json)
return metadata_errors


Expand Down
12 changes: 12 additions & 0 deletions ckanext/metadata/tests/test_metadata_model_actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -426,6 +426,18 @@ def test_update_valid_partial(self):
assert obj.title == metadata_model['title']
assert obj.description == metadata_model['description']

def test_update_valid_datacite(self):
metadata_model = ckanext_factories.MetadataModel()
input_dict = {
'id': metadata_model['id'],
'metadata_schema_id': metadata_model['metadata_schema_id'],
'organization_id': '',
'infrastructure_id': '',
'model_json': load_example('saeon_datacite_model.json'),
}
result, obj = self._test_action('metadata_model_update', **input_dict)
assert_object_matches_dict(obj, input_dict)

def test_update_valid_set_organization(self):
metadata_model = ckanext_factories.MetadataModel()
organization = ckan_factories.Organization()
Expand Down
18 changes: 12 additions & 6 deletions ckanext/metadata/tests/test_metadata_record_actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -483,9 +483,15 @@ def test_invalidate(self):
self._assert_metadata_record_has_validation_models(metadata_record['id'], metadata_model['name'])
self._assert_invalidate_activity_logged(metadata_record['id'], None, None)

# def test_validate_datacite(self):
# metadata_record = self._generate_metadata_record(
# metadata_json=load_example('saeon_datacite_record.json'))
# metadata_model = ckanext_factories.MetadataModel(
# metadata_schema_id=metadata_record['metadata_schema_id'],
# model_json=load_example('saeon_datacite_model.json'))
def test_validate_datacite(self):
metadata_record = self._generate_metadata_record(
metadata_json=load_example('saeon_datacite_record.json'))
metadata_model = ckanext_factories.MetadataModel(
metadata_schema_id=metadata_record['metadata_schema_id'],
model_json=load_example('saeon_datacite_model.json'))

self._assert_metadata_record_has_validation_models(metadata_record['id'], metadata_model['name'])
self._test_action('metadata_record_validate', id=metadata_record['id'])
assert_package_has_extra(metadata_record['id'], 'validated', True)
# TODO: finalise exact structure of DataCite model; currently the sample record does not validate
# self._assert_validate_activity_logged(metadata_record['id'], metadata_model)

0 comments on commit 026b6df

Please sign in to comment.