From fe01816edcdcdf44e89e036b0c8806520413ba07 Mon Sep 17 00:00:00 2001 From: Graeme Watt Date: Thu, 29 Jun 2017 14:56:25 +0100 Subject: [PATCH] schema and yaml: correct bugs in schema and use safe YAML load/dump * schema: rename some "id" values, and fix a major bug where the value of "items" was given as an array of one JSON schema, thereby only validating the first element of a list instead of *all* elements in the list. * testsuite: fix test data to match newly corrected schema. * yaml: always use SafeLoader and SafeDumper or preferably the corresponding LibYAML classes if installed. * yaml: simplify validation of parser errors by catching a generic Exception rather than yaml.parser.ParserError and IOError, also now implicitly catching a yaml.constructor.ConstructorError caused by the presence of "!!python/unicode" tags. * version: bump to 0.1.16. Signed-off-by: Graeme Watt --- hepdata_validator/data_file_validator.py | 43 +--- .../schemas/additional_info_schema.json | 224 +++++++++--------- hepdata_validator/schemas/data_schema.json | 4 +- .../schemas/submission_schema.json | 196 ++++++++------- .../submission_file_validator.py | 22 +- hepdata_validator/version.py | 2 +- .../test_data/valid_submission_license.yaml | 6 +- 7 files changed, 230 insertions(+), 267 deletions(-) diff --git a/hepdata_validator/data_file_validator.py b/hepdata_validator/data_file_validator.py index 2469c5d..daea2e5 100644 --- a/hepdata_validator/data_file_validator.py +++ b/hepdata_validator/data_file_validator.py @@ -26,8 +26,12 @@ import os import yaml -from yaml.scanner import ScannerError -from yaml.parser import ParserError + +# We try to load using the CSafeLoader for speed improvements. +try: + from yaml import CSafeLoader as Loader +except ImportError: #pragma: no cover + from yaml import SafeLoader as Loader #pragma: no cover from hepdata_validator import Validator, ValidationMessage from jsonschema import validate as json_validate, ValidationError @@ -89,36 +93,11 @@ def validate(self, **kwargs): if data is None: try: - # We try to load using the CLoader for speed improvements. - try: - data = yaml.load(open(file_path, 'r'), Loader=yaml.CLoader) - except ScannerError as se: - self.add_validation_message(ValidationMessage(file=file_path, message= - 'There was a problem parsing the file.\n' + str(se))) - return False - except ParserError as pe: - self.add_validation_message(ValidationMessage(file=file_path, message= - 'There was a problem parsing the file.\n' + pe.__str__())) - return False - except IOError as ioe: - self.add_validation_message(ValidationMessage(file=file_path, message= - 'There was a problem parsing the file.\n' + ioe.__str__())) - return False - except: #pragma: no cover - try: # pragma: no cover - data = yaml.load(open(file_path, 'r')) # pragma: no cover - except ScannerError as se: # pragma: no cover - self.add_validation_message(ValidationMessage(file=file_path, message= - 'There was a problem parsing the file.\n' + str(se))) # pragma: no cover - return False - except ParserError as pe: # pragma: no cover - self.add_validation_message(ValidationMessage(file=file_path, message= - 'There was a problem parsing the file.\n' + pe.__str__())) - return False - except IOError as ioe: - self.add_validation_message(ValidationMessage(file=file_path, message= - 'There was a problem parsing the file.\n' + ioe.__str__())) - return False + data = yaml.load(open(file_path, 'r'), Loader=Loader) + except Exception as e: + self.add_validation_message(ValidationMessage(file=file_path, message= + 'There was a problem parsing the file.\n' + e.__str__())) + return False try: diff --git a/hepdata_validator/schemas/additional_info_schema.json b/hepdata_validator/schemas/additional_info_schema.json index dda7b22..edf8522 100644 --- a/hepdata_validator/schemas/additional_info_schema.json +++ b/hepdata_validator/schemas/additional_info_schema.json @@ -9,32 +9,30 @@ "record_ids": { "id": "http://jsonschema.net/record_ids", "type": "array", - "items": [ - { - "id": "http://jsonschema.net/record_ids/record_id", - "type": "object", - "properties": { - "type": { - "id": "http://jsonschema.net/record_ids/record_id/type", - "type": [ - "string" - ] - }, - "id": { - "id": "http://jsonschema.net/record_ids/record_id/id", - "type": [ - "string", - "number" - ] - } + "items": { + "id": "http://jsonschema.net/record_ids/record_id", + "type": "object", + "properties": { + "type": { + "id": "http://jsonschema.net/record_ids/record_id/type", + "type": [ + "string" + ] }, - "additionalProperties": false, - "required": [ - "type", - "id" - ] - } - ] + "id": { + "id": "http://jsonschema.net/record_ids/record_id/id", + "type": [ + "string", + "number" + ] + } + }, + "additionalProperties": false, + "required": [ + "type", + "id" + ] + } }, "preprintyear": { "id": "http://hepdata.org/submission/schema/data/preprintyear", @@ -60,109 +58,103 @@ "modifications": { "id": "http://jsonschema.net/modifications", "type": "array", - "items": [ - { - "id": "http://jsonschema.net/modifications/modification", - "type": "object", - "properties": { - "action": { - "id": "http://jsonschema.net/modifications/modification/action", - "type": "string" - }, - "who": { - "id": "http://jsonschema.net/modifications/modification/who", - "type": "string" - } + "items": { + "id": "http://jsonschema.net/modifications/modification", + "type": "object", + "properties": { + "action": { + "id": "http://jsonschema.net/modifications/modification/action", + "type": "string" }, - "additionalProperties": true, - "required": [ - "action", - "who" - ] - } - ] + "who": { + "id": "http://jsonschema.net/modifications/modification/who", + "type": "string" + } + }, + "additionalProperties": true, + "required": [ + "action", + "who" + ] + } }, "additional_resources": { "id": "http://jsonschema.net/additional_resources", "type": "array", - "items": [ - { - "id": "http://jsonschema.net/additional_resources/resource", - "type": "object", - "properties": { - "location": { - "id": "http://jsonschema.net/additional_resources/resource/location", - "type": "string" - }, - "description": { - "id": "http://jsonschema.net/additional_resources/resource/description", - "type": "string" - }, - "license": { - "id": "http://jsonschema.net/additional_resources/resource/license", - "type": "object", - "properties": { - "name": { - "id": "http://jsonschema.net/additional_resources/resource/license/name", - "type": "string" - }, - "url": { - "id": "http://jsonschema.net/additional_resources/resource/license/url", - "type": "string" - }, - "description": { - "id": "http://jsonschema.net/additional_resources/resource/license/description", - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "name", - "url" - ] - } + "items": { + "id": "http://jsonschema.net/additional_resources/resource", + "type": "object", + "properties": { + "location": { + "id": "http://jsonschema.net/additional_resources/resource/location", + "type": "string" + }, + "description": { + "id": "http://jsonschema.net/additional_resources/resource/description", + "type": "string" }, - "required": [ - "location" - ] - } - ] + "license": { + "id": "http://jsonschema.net/additional_resources/resource/license", + "type": "object", + "properties": { + "name": { + "id": "http://jsonschema.net/additional_resources/resource/license/name", + "type": "string" + }, + "url": { + "id": "http://jsonschema.net/additional_resources/resource/license/url", + "type": "string" + }, + "description": { + "id": "http://jsonschema.net/additional_resources/resource/license/description", + "type": "string" + } + }, + "additionalProperties": false, + "required": [ + "name", + "url" + ] + } + }, + "required": [ + "location" + ] + } }, "associated_records": { "id": "http://jsonschema.net/associated_records", "type": "array", "description": "Links to other HEPData Submissions or INSPIRE Records that relate to this submission.", - "items": [ - { - "id": "http://jsonschema.net/associated_records/output", - "type": "object", - "properties": { - "type": { - "id": "http://jsonschema.net/associated_records/output/type", - "type": "string" - }, - "identifier": { - "id": "http://jsonschema.net/associated_records/output/identifier", - "type": [ - "string", - "number" - ] - }, - "description": { - "id": "http://jsonschema.net/associated_records/output/description", - "type": "string" - }, - "url": { - "id": "http://jsonschema.net/associated_records/output/url", - "type": "string" - } + "items": { + "id": "http://jsonschema.net/associated_records/output", + "type": "object", + "properties": { + "type": { + "id": "http://jsonschema.net/associated_records/output/type", + "type": "string" + }, + "identifier": { + "id": "http://jsonschema.net/associated_records/output/identifier", + "type": [ + "string", + "number" + ] + }, + "description": { + "id": "http://jsonschema.net/associated_records/output/description", + "type": "string" }, - "required": [ - "identifier", - "type" - ] - } - ] + "url": { + "id": "http://jsonschema.net/associated_records/output/url", + "type": "string" + } + }, + "required": [ + "identifier", + "type" + ] + } }, "comment": { "id": "http://hepdata.org/submission/schema/additional_info/comment", diff --git a/hepdata_validator/schemas/data_schema.json b/hepdata_validator/schemas/data_schema.json index 9ba80aa..37d3380 100644 --- a/hepdata_validator/schemas/data_schema.json +++ b/hepdata_validator/schemas/data_schema.json @@ -47,11 +47,11 @@ ] }, "low": { - "id": "http://hepdata.org/submission/schema/data/independent_variables/0/values/1/value", + "id": "http://hepdata.org/submission/schema/data/independent_variables/0/values/1/low", "type": "number" }, "high": { - "id": "http://hepdata.org/submission/schema/data/independent_variables/0/values/1/value", + "id": "http://hepdata.org/submission/schema/data/independent_variables/0/values/1/high", "type": "number" } }, diff --git a/hepdata_validator/schemas/submission_schema.json b/hepdata_validator/schemas/submission_schema.json index c752947..49c16bb 100644 --- a/hepdata_validator/schemas/submission_schema.json +++ b/hepdata_validator/schemas/submission_schema.json @@ -18,33 +18,31 @@ "keywords": { "id": "http://jsonschema.net/keywords", "type": "array", - "items": [ - { - "id": "http://jsonschema.net/additional_resources/keywords/keyword", - "type": "object", - "properties": { - "name": { - "id": "http://jsonschema.net/keywords/name", - "type": "string" - }, - "values": { - "id": "http://jsonschema.net/keywords/values", - "type": "array", - "items": { - "id": "http://jsonschema.net/keywords/values/value", - "type": [ - "string", - "number" - ] - } - } + "items": { + "id": "http://jsonschema.net/keywords/keyword", + "type": "object", + "properties": { + "name": { + "id": "http://jsonschema.net/keywords/keyword/name", + "type": "string" }, - "required": [ - "name", - "values" - ] - } - ] + "values": { + "id": "http://jsonschema.net/keywords/keyword/values", + "type": "array", + "items": { + "id": "http://jsonschema.net/keywords/keyword/values/value", + "type": [ + "string", + "number" + ] + } + } + }, + "required": [ + "name", + "values" + ] + } }, "data_file": { "id": "http://jsonschema.net/data_file", @@ -83,87 +81,83 @@ "additional_resources": { "id": "http://jsonschema.net/additional_resources", "type": "array", - "items": [ - { - "id": "http://jsonschema.net/additional_resources/resource", - "type": "object", - "properties": { - "location": { - "id": "http://jsonschema.net/additional_resources/resource/location", - "type": "string" - }, - "description": { - "id": "http://jsonschema.net/additional_resources/resource/description", - "type": "string" - }, - "type": { - "id": "http://jsonschema.net/additional_resources/resource/type", - "type": "string" - }, - "license": { - "id": "http://jsonschema.net/additional_resources/resource/license", - "type": "object", - "properties": { - "name": { - "id": "http://jsonschema.net/additional_resources/resource/license/name", - "type": "string" - }, - "url": { - "id": "http://jsonschema.net/additional_resources/resource/license/url", - "type": "string" - }, - "description": { - "id": "http://jsonschema.net/additional_resources/resource/license/description", - "type": "string" - } - }, - "required": [ - "name", - "url" - ] - } + "items": { + "id": "http://jsonschema.net/additional_resources/resource", + "type": "object", + "properties": { + "location": { + "id": "http://jsonschema.net/additional_resources/resource/location", + "type": "string" }, - "required": [ - "location" - ] - } - ] + "description": { + "id": "http://jsonschema.net/additional_resources/resource/description", + "type": "string" + }, + "type": { + "id": "http://jsonschema.net/additional_resources/resource/type", + "type": "string" + }, + "license": { + "id": "http://jsonschema.net/additional_resources/resource/license", + "type": "object", + "properties": { + "name": { + "id": "http://jsonschema.net/additional_resources/resource/license/name", + "type": "string" + }, + "url": { + "id": "http://jsonschema.net/additional_resources/resource/license/url", + "type": "string" + }, + "description": { + "id": "http://jsonschema.net/additional_resources/resource/license/description", + "type": "string" + } + }, + "required": [ + "name", + "url" + ] + } + }, + "required": [ + "location" + ] + } }, "associated_records": { "id": "http://jsonschema.net/associated_records", "type": "array", "description": "Links to other HEPData Submissions or INSPIRE Records that relate to this submission.", - "items": [ - { - "id": "http://jsonschema.net/associated_records/output", - "type": "object", - "properties": { - "type": { - "id": "http://jsonschema.net/associated_records/output/type", - "type": "string" - }, - "identifier": { - "id": "http://jsonschema.net/associated_records/output/identifier", - "type": [ - "string", - "number" - ] - }, - "description": { - "id": "http://jsonschema.net/associated_records/output/description", - "type": "string" - }, - "url": { - "id": "http://jsonschema.net/associated_records/output/url", - "type": "string" - } + "items": { + "id": "http://jsonschema.net/associated_records/output", + "type": "object", + "properties": { + "type": { + "id": "http://jsonschema.net/associated_records/output/type", + "type": "string" }, - "required": [ - "identifier", - "type" - ] - } - ] + "identifier": { + "id": "http://jsonschema.net/associated_records/output/identifier", + "type": [ + "string", + "number" + ] + }, + "description": { + "id": "http://jsonschema.net/associated_records/output/description", + "type": "string" + }, + "url": { + "id": "http://jsonschema.net/associated_records/output/url", + "type": "string" + } + }, + "required": [ + "identifier", + "type" + ] + } } }, "required": [ @@ -172,4 +166,4 @@ "keywords", "data_file" ] -} \ No newline at end of file +} diff --git a/hepdata_validator/submission_file_validator.py b/hepdata_validator/submission_file_validator.py index 3a6264f..f0dc959 100644 --- a/hepdata_validator/submission_file_validator.py +++ b/hepdata_validator/submission_file_validator.py @@ -3,7 +3,13 @@ import os import yaml from yaml.scanner import ScannerError -from yaml.parser import ParserError + +# We try to load using the CSafeLoader for speed improvements. +try: + from yaml import CSafeLoader as Loader +except ImportError: #pragma: no cover + from yaml import SafeLoader as Loader #pragma: no cover + from hepdata_validator import Validator, ValidationMessage __author__ = 'eamonnmaguire' @@ -39,11 +45,7 @@ def validate(self, **kwargs): raise LookupError("file_path argument must be supplied") if data is None: - try: - # We try to load using the CLoader for speed improvements. - data = yaml.load_all(open(file_path, 'r'), Loader=yaml.CLoader) - except Exception as e: #pragma: no cover - data = yaml.load_all(open(file_path, 'r')) #pragma: no cover + data = yaml.load_all(open(file_path, 'r'), Loader=Loader) for data_item in data: if data_item is None: @@ -73,10 +75,6 @@ def validate(self, **kwargs): 'Diagnostic information follows.\n' + str(se))) return False - except ParserError as pe: - self.add_validation_message(ValidationMessage(file=file_path, message=pe.__str__())) - return False - - except IOError as ioe: - self.add_validation_message(ValidationMessage(file=file_path, message=ioe.__str__())) + except Exception as e: + self.add_validation_message(ValidationMessage(file=file_path, message=e.__str__())) return False \ No newline at end of file diff --git a/hepdata_validator/version.py b/hepdata_validator/version.py index 14968ab..ff0feae 100644 --- a/hepdata_validator/version.py +++ b/hepdata_validator/version.py @@ -27,4 +27,4 @@ from __future__ import absolute_import, print_function -__version__ = "0.1.15" +__version__ = "0.1.16" diff --git a/testsuite/test_data/valid_submission_license.yaml b/testsuite/test_data/valid_submission_license.yaml index 7617643..90052d2 100644 --- a/testsuite/test_data/valid_submission_license.yaml +++ b/testsuite/test_data/valid_submission_license.yaml @@ -7,8 +7,8 @@ additional_resources: description: "Full source code for creating this data" license: name: 'GPL 2' - location: 'somewhere over the rainbow...' - blah: blah + url: 'somewhere over the rainbow...' + description: blah modifications: # what, by whom and when encoded or changed - {action: 'Encoded', date: '27 FEB 2014', who: 'Graeme Watt'} @@ -58,7 +58,7 @@ additional_resources: description: "Full source code for creating this data" license: name: 'GPL 2' - + url: 'some url' --- # This is Table 2. name: "Table 2"