Skip to content

Commit

Permalink
Add checks for zero uncertainty and length of values list in data schema
Browse files Browse the repository at this point in the history
* Check that uncertainties are not all zero in data schema (closes #7).
* Check that length of 'values' list is consistent in data schema.
* Add tests for all zero uncertainties and inconsistent 'values' list.
* Catch LookupError in 'validate' method of SubmissionFileValidator.
* Rename 'valid_data_with_percent.yaml' file and remove invalid fields.
* Change 'error' to 'uncertainty' in tests since 'error' is ambiguous.
* Add 2nd value for independent variable in 'valid_file.yaml' (& json).
* Correct mistake in README.rst, add print_errors in last usage example.

Signed-off-by: Graeme Watt <graeme.watt@durham.ac.uk>
  • Loading branch information
GraemeWatt committed Feb 25, 2020
1 parent 7148b62 commit 293eec0
Show file tree
Hide file tree
Showing 10 changed files with 243 additions and 20 deletions.
4 changes: 3 additions & 1 deletion README.rst
Expand Up @@ -103,13 +103,15 @@ for the error message lookup map.
from hepdata_validator.data_file_validator import DataFileValidator
import yaml
file = yaml.load(open('data.yaml', 'r'))
file_contents = yaml.load(open('data.yaml', 'r'))
data_file_validator = DataFileValidator()
data_file_validator.validate(file_path='data.yaml', data=file_contents)
data_file_validator.get_messages('data.yaml')
data_file_validator.print_errors('data.yaml')
An example `offline validation script <https://github.com/HEPData/hepdata-submission/blob/master/scripts/check.py>`_
uses the ``hepdata_validator`` package to validate the ``submission.yaml`` file and all YAML data files of a
Expand Down
64 changes: 64 additions & 0 deletions hepdata_validator/data_file_validator.py
Expand Up @@ -121,6 +121,9 @@ def validate(self, **kwargs):
json_validate(data, custom_schema)
else:
json_validate(data, default_data_schema)
if self.schema_version != '0.1.0':
check_for_zero_uncertainty(data)
check_length_values(data)

except ValidationError as ve:

Expand All @@ -143,3 +146,64 @@ def __init__(self, message=''):

def __unicode__(self):
return self.message


def check_for_zero_uncertainty(data):
"""
Check that uncertainties are not all zero.
:param data: data table in YAML format
:return: raise ValidationError if uncertainties are all zero
"""
for dependent_variable in data['dependent_variables']:

if 'values' in dependent_variable:
for value in dependent_variable['values']:

if 'errors' in value:
zero_uncertainties = []
for error in value['errors']:

if 'symerror' in error:
error_plus = error_minus = error['symerror']
elif 'asymerror' in error:
error_plus = error['asymerror']['plus']
error_minus = error['asymerror']['minus']

if isinstance(error_plus, str):
error_plus = error_plus.replace('%', '')
try:
error_plus = float(error_plus)
except ValueError:
pass

if isinstance(error_minus, str):
error_minus = error_minus.replace('%', '')
try:
error_minus = float(error_minus)
except ValueError:
pass

if error_plus == 0 and error_minus == 0:
zero_uncertainties.append(True)
else:
zero_uncertainties.append(False)

if all(zero_uncertainties):
raise ValidationError('Uncertainties should not all be zero', instance=value)


def check_length_values(data):
"""
Check that the length of the 'values' list is consistent for
each of the independent_variables and dependent_variables.
:param data: data table in YAML format
:return: raise ValidationError if inconsistent
"""
indep_count = [len(indep['values']) for indep in data['independent_variables']]
dep_count = [len(dep['values']) for dep in data['dependent_variables']]
if len(set(indep_count + dep_count)) > 1: # if more than one unique count
raise ValidationError("Inconsistent length of 'values' list: " +
"independent_variables%s, dependent_variables%s" % (str(indep_count), str(dep_count)),
instance=data)
3 changes: 3 additions & 0 deletions hepdata_validator/submission_file_validator.py
Expand Up @@ -84,6 +84,9 @@ def validate(self, **kwargs):
if not self.has_errors(file_path):
return_value = True

except LookupError as le:
raise le

except ScannerError as se: # pragma: no cover
self.add_validation_message( # pragma: no cover
ValidationMessage(file=file_path, message=
Expand Down
20 changes: 20 additions & 0 deletions testsuite/test_data/file_with_inconsistent_values.yaml
@@ -0,0 +1,20 @@
---
independent_variables:
- header: {name: SQRT(S), units: GEV}
values:
- value: 7000
dependent_variables:
- header: {name: SIG(total), units: FB}
qualifiers:
- {name: RE, value: P P --> Z0 Z0 X}
values:
- value: 6.7
errors:
- {symerror: 0.45, label: stat}
- {asymerror: {plus: 0.4, minus: -0.3}, label: sys}
- {symerror: 0.34, label: "sys,lumi"}
- value: 5.7
errors:
- {symerror: 0.4, label: stat}
- {asymerror: {plus: 0.42, minus: 0.31}, label: sys}
- {symerror: 0.4, label: "sys,lumi"}
21 changes: 21 additions & 0 deletions testsuite/test_data/file_with_zero_uncertainty.yaml
@@ -0,0 +1,21 @@
---
independent_variables:
- header: {name: SQRT(S), units: GEV}
values:
- value: 7000
- value: 8000
dependent_variables:
- header: {name: SIG(total), units: FB}
qualifiers:
- {name: RE, value: P P --> Z0 Z0 X}
values:
- value: 6.7
errors:
- {symerror: 0.45, label: stat}
- {asymerror: {plus: 0.4, minus: -0.3}, label: sys}
- {symerror: 0.34, label: "sys,lumi"}
- value: 5.7
errors:
- {symerror: 0.0, label: stat}
- {asymerror: {plus: 0.0, minus: 0.0}, label: sys}
- {symerror: 0.0, label: "sys,lumi"}
@@ -1,14 +1,4 @@

---
name: 'Table 1'
label: 'Data from Table 4'
description: |
Measured double-differential dijet cross sections for the range 0.0 &lt;= y* &lt; 0.5 and jet radius parameter R = 0.4. The statistical uncertainties from data and MC simulation have been combined. The three columns correspond to nominal, stronger or weaker correlations between jet energy scale uncertainty components
keywords:
- {name: reactions, values: ['P P --> JET JET X']}
- {name: observables, values: ['D2SIG/DM/DYRAP']}
- {name: cmenergies, values: [7000.0]}
additional_resources:
independent_variables:
- header: {name: 'M(2JET)', units: 'TEV'}
values:
Expand Down Expand Up @@ -49,7 +39,7 @@ dependent_variables:
- value: 777000.0
errors:
- {symerror: 0.73%, label: 'stat'}
- {asymerror: {plus: 3.1%, minus: -3.0%}, label: 'sys'}
- {asymerror: {plus: '', minus: -3.0%}, label: 'sys'} # empty string for 'plus' component
- {symerror: 0.4%, label: 'sys'}
- {symerror: 0.1%, label: 'sys'}
- {symerror: 0.2%, label: 'sys'}
Expand All @@ -61,7 +51,7 @@ dependent_variables:
- {symerror: 1.0%, label: 'sys'}
- {symerror: 0.3%, label: 'sys'}
- {symerror: 0.8%, label: 'sys'}
- {asymerror: {plus: 0.5%, minus: -0.6%}, label: 'sys'}
- {asymerror: {plus: 0.5%, minus: ''}, label: 'sys'} # empty string for 'minus' component
- {symerror: 0.0%, label: 'sys'}
- {symerror: 0.0%, label: 'sys'}
- {symerror: 0.0%, label: 'sys'}
Expand Down
89 changes: 89 additions & 0 deletions testsuite/test_data/valid_data_with_zero_percent.yaml
@@ -0,0 +1,89 @@
---
independent_variables:
- header: {name: 'M(2JET)', units: 'TEV'}
values:
- {low: 0.26, high: 0.31}
dependent_variables:
- header: {name: 'D2(SIG)/DM(2JET)/DYRAP*', units: 'PB*TEV*-1'}
qualifiers:
- {name: SQRT(S), value: '7000.0', units: 'GeV'}
- {name: YRAP*, value: '0.0-0.5'}
- {name: R, value: '0.4'}
- {name: RE, value: 'P P --> JET JET X'}
- {name: PT(JET1), value: '> 100', units: 'GEV'}
- {name: PT(JET2), value: '> 50', units: 'GEV'}
- {name: ABS(YRAP(JET1)), value: '< 3.0'}
- {name: ABS(YRAP(JET2)), value: '< 3.0'}
- {name: Correlation assumptions, value: 'Nominal'}
values:
- value: 777000.0
errors:
- {symerror: 0.0%, label: 'stat'}
- {asymerror: {plus: 0.0%, minus: -0.0%}, label: 'sys'}
- {symerror: 0.0%, label: 'sys'}
- {symerror: 0.0%, label: 'sys'}
- {symerror: 0.0%, label: 'sys'}
- {symerror: 0.0%, label: 'sys'}
- {symerror: 0.0%, label: 'sys'}
- {symerror: 0.0%, label: 'sys'}
- {symerror: 0.0%, label: 'sys'}
- {symerror: 0.0%, label: 'sys'}
- {symerror: 0.0%, label: 'sys'}
- {symerror: 0.0%, label: 'sys'}
- {symerror: 0.0%, label: 'sys'}
- {asymerror: {plus: 0.0%, minus: -0.0%}, label: 'sys'}
- {symerror: 0.0%, label: 'sys'}
- {symerror: 0.0%, label: 'sys'}
- {symerror: 0.0%, label: 'sys'}
- {symerror: 0.0%, label: 'sys'}
- {symerror: 0.0%, label: 'sys'}
- {symerror: 0.0%, label: 'sys'}
- {symerror: 0.0%, label: 'sys'}
- {symerror: 0.0%, label: 'sys'}
- {asymerror: {plus: 0.0%, minus: -0.0%}, label: 'sys'}
- {asymerror: {plus: 0.0%, minus: -0.0%}, label: 'sys'}
- {symerror: 0.0%, label: 'sys'}
- {asymerror: {plus: 0.0%, minus: -0.0%}, label: 'sys'}
- {symerror: 0.0%, label: 'sys'}
- {asymerror: {plus: 0.0%, minus: -0.0%}, label: 'sys'}
- {symerror: 0.0%, label: 'sys'}
- {symerror: 0.0%, label: 'sys'}
- {asymerror: {plus: 0.0%, minus: -0.0%}, label: 'sys'}
- {asymerror: {plus: 0.0%, minus: -0.0%}, label: 'sys'}
- {symerror: 0.0%, label: 'sys'}
- {symerror: 0.0%, label: 'sys'}
- {symerror: 0.0%, label: 'sys'}
- {symerror: 0.0%, label: 'sys'}
- {asymerror: {plus: 0.0%, minus: -0.0%}, label: 'sys'}
- {symerror: 0.0%, label: 'sys'}
- {symerror: 0.0%, label: 'sys'}
- {symerror: 0.0%, label: 'sys'}
- {symerror: 0.0%, label: 'sys'}
- {symerror: 0.0%, label: 'sys'}
- {symerror: 0.0%, label: 'sys'}
- {symerror: 0.0%, label: 'sys'}
- {symerror: 0.0%, label: 'sys'}
- {symerror: 0.0%, label: 'sys'}
- {symerror: 0.0%, label: 'sys'}
- {symerror: 0.0%, label: 'sys'}
- {symerror: 0.0%, label: 'sys'}
- {symerror: 0.0%, label: 'sys'}
- {symerror: 0.0%, label: 'sys'}
- {symerror: 0.0%, label: 'sys'}
- {symerror: 0.0%, label: 'sys'}
- {symerror: 0.0%, label: 'sys'}
- {symerror: 0.0%, label: 'sys'}
- {symerror: 0.0%, label: 'sys'}
- {asymerror: {plus: 0.0%, minus: -0.0%}, label: 'sys'}
- {symerror: 0.0%, label: 'sys'}
- {symerror: 0.0%, label: 'sys'}
- {asymerror: {plus: -0.0%, minus: 0.0%}, label: 'sys'}
- {asymerror: {plus: -0.0%, minus: 0.0%}, label: 'sys'}
- {asymerror: {plus: 0.0%, minus: -0.0%}, label: 'sys'}
- {symerror: 0.0%, label: 'sys'}
- {asymerror: {plus: 0.0%, minus: -0.0%}, label: 'sys'}
- {symerror: 0.0%, label: 'sys'}
- {symerror: 0.0%, label: 'sys'}
- {symerror: 0.0%, label: 'sys'}
- {symerror: 0.0%, label: 'sys'}
- {symerror: 0.0%, label: 'sys'}
3 changes: 3 additions & 0 deletions testsuite/test_data/valid_file.json
Expand Up @@ -9,6 +9,9 @@
{
"high": 7000,
"low": 6500
},
{
"value": 8000
}
]
}
Expand Down
1 change: 1 addition & 0 deletions testsuite/test_data/valid_file.yaml
Expand Up @@ -3,6 +3,7 @@ independent_variables:
- header: {name: SQRT(S), units: GEV}
values:
- value: 7000
- value: 8000
dependent_variables:
- header: {name: SIG(total), units: FB}
qualifiers:
Expand Down
44 changes: 37 additions & 7 deletions testsuite/validation_test.py
Expand Up @@ -193,9 +193,9 @@ def setUp(self):
self.base_dir,
'test_data/invalid_file.json')

self.valid_file_error_percent_yaml = os.path.join(
self.valid_file_uncertainty_percent_yaml = os.path.join(
self.base_dir,
'test_data/valid_data_with_error.yaml'
'test_data/valid_data_with_percent.yaml'
)

self.invalid_syntax_data_file = os.path.join(
Expand All @@ -212,6 +212,18 @@ def setUp(self):
self.base_dir,
'test_data/valid_file_custom.yaml')

self.file_with_zero_uncertainty = os.path.join(
self.base_dir,
'test_data/file_with_zero_uncertainty.yaml')

self.file_with_zero_percent = os.path.join(
self.base_dir,
'test_data/valid_data_with_zero_percent.yaml')

self.file_with_inconsistent_values = os.path.join(
self.base_dir,
'test_data/file_with_inconsistent_values.yaml')

def test_no_file_path_supplied(self):
try:
self.validator.validate(file_path=None)
Expand All @@ -231,11 +243,11 @@ def test_invalid_yaml_file(self):

self.validator.print_errors(self.invalid_file_yaml)

def test_valid_file_with_percent_errors(self):
print('___DATA_VALIDATION: Testing valid yaml percent error ___')
self.assertEqual(self.validator.validate(file_path=self.valid_file_error_percent_yaml),
False)
self.validator.print_errors(self.valid_file_error_percent_yaml)
def test_valid_file_with_percent_uncertainty(self):
print('___DATA_VALIDATION: Testing valid yaml percent uncertainty ___')
is_valid = self.validator.validate(file_path=self.valid_file_uncertainty_percent_yaml)
self.validator.print_errors(self.valid_file_uncertainty_percent_yaml)
self.assertEqual(is_valid, True)

def test_valid_json_file(self):
print('___DATA_VALIDATION: Testing valid json submission___')
Expand Down Expand Up @@ -319,6 +331,24 @@ def test_ioerror_yaml_file(self):

self.validator.print_errors(self.valid_file_yaml[:-1])

def test_file_with_zero_uncertainty(self):
print('___DATA_VALIDATION: Testing file with zero uncertainty___')
self.assertEqual(self.validator.validate(file_path=self.file_with_zero_uncertainty), False)

self.validator.print_errors(self.file_with_zero_uncertainty)

def test_file_with_zero_percent(self):
print('___DATA_VALIDATION: Testing file with zero percent uncertainty___')
self.assertEqual(self.validator.validate(file_path=self.file_with_zero_percent), False)

self.validator.print_errors(self.file_with_zero_percent)

def test_file_with_inconsistent_values(self):
print('___DATA_VALIDATION: Testing file with inconsistent values list___')
self.assertEqual(self.validator.validate(file_path=self.file_with_inconsistent_values), False)

self.validator.print_errors(self.file_with_inconsistent_values)


if __name__ == '__main__':
unittest.main()

0 comments on commit 293eec0

Please sign in to comment.