diff --git a/README.rst b/README.rst index 15fc643..b6b81fd 100644 --- a/README.rst +++ b/README.rst @@ -103,13 +103,15 @@ for the error message lookup map. from hepdata_validator.data_file_validator import DataFileValidator import yaml - file = yaml.load(open('data.yaml', 'r')) + file_contents = yaml.load(open('data.yaml', 'r')) data_file_validator = DataFileValidator() data_file_validator.validate(file_path='data.yaml', data=file_contents) data_file_validator.get_messages('data.yaml') + data_file_validator.print_errors('data.yaml') + An example `offline validation script `_ uses the ``hepdata_validator`` package to validate the ``submission.yaml`` file and all YAML data files of a diff --git a/hepdata_validator/data_file_validator.py b/hepdata_validator/data_file_validator.py index d666ccb..ce4edf3 100644 --- a/hepdata_validator/data_file_validator.py +++ b/hepdata_validator/data_file_validator.py @@ -121,6 +121,9 @@ def validate(self, **kwargs): json_validate(data, custom_schema) else: json_validate(data, default_data_schema) + if self.schema_version != '0.1.0': + check_for_zero_uncertainty(data) + check_length_values(data) except ValidationError as ve: @@ -143,3 +146,64 @@ def __init__(self, message=''): def __unicode__(self): return self.message + + +def check_for_zero_uncertainty(data): + """ + Check that uncertainties are not all zero. + + :param data: data table in YAML format + :return: raise ValidationError if uncertainties are all zero + """ + for dependent_variable in data['dependent_variables']: + + if 'values' in dependent_variable: + for value in dependent_variable['values']: + + if 'errors' in value: + zero_uncertainties = [] + for error in value['errors']: + + if 'symerror' in error: + error_plus = error_minus = error['symerror'] + elif 'asymerror' in error: + error_plus = error['asymerror']['plus'] + error_minus = error['asymerror']['minus'] + + if isinstance(error_plus, str): + error_plus = error_plus.replace('%', '') + try: + error_plus = float(error_plus) + except ValueError: + pass + + if isinstance(error_minus, str): + error_minus = error_minus.replace('%', '') + try: + error_minus = float(error_minus) + except ValueError: + pass + + if error_plus == 0 and error_minus == 0: + zero_uncertainties.append(True) + else: + zero_uncertainties.append(False) + + if all(zero_uncertainties): + raise ValidationError('Uncertainties should not all be zero', instance=value) + + +def check_length_values(data): + """ + Check that the length of the 'values' list is consistent for + each of the independent_variables and dependent_variables. + + :param data: data table in YAML format + :return: raise ValidationError if inconsistent + """ + indep_count = [len(indep['values']) for indep in data['independent_variables']] + dep_count = [len(dep['values']) for dep in data['dependent_variables']] + if len(set(indep_count + dep_count)) > 1: # if more than one unique count + raise ValidationError("Inconsistent length of 'values' list: " + + "independent_variables%s, dependent_variables%s" % (str(indep_count), str(dep_count)), + instance=data) \ No newline at end of file diff --git a/hepdata_validator/submission_file_validator.py b/hepdata_validator/submission_file_validator.py index eb7e433..8bf70d6 100644 --- a/hepdata_validator/submission_file_validator.py +++ b/hepdata_validator/submission_file_validator.py @@ -84,6 +84,9 @@ def validate(self, **kwargs): if not self.has_errors(file_path): return_value = True + except LookupError as le: + raise le + except ScannerError as se: # pragma: no cover self.add_validation_message( # pragma: no cover ValidationMessage(file=file_path, message= diff --git a/testsuite/test_data/file_with_inconsistent_values.yaml b/testsuite/test_data/file_with_inconsistent_values.yaml new file mode 100644 index 0000000..a37ad10 --- /dev/null +++ b/testsuite/test_data/file_with_inconsistent_values.yaml @@ -0,0 +1,20 @@ +--- +independent_variables: + - header: {name: SQRT(S), units: GEV} + values: + - value: 7000 +dependent_variables: + - header: {name: SIG(total), units: FB} + qualifiers: + - {name: RE, value: P P --> Z0 Z0 X} + values: + - value: 6.7 + errors: + - {symerror: 0.45, label: stat} + - {asymerror: {plus: 0.4, minus: -0.3}, label: sys} + - {symerror: 0.34, label: "sys,lumi"} + - value: 5.7 + errors: + - {symerror: 0.4, label: stat} + - {asymerror: {plus: 0.42, minus: 0.31}, label: sys} + - {symerror: 0.4, label: "sys,lumi"} \ No newline at end of file diff --git a/testsuite/test_data/file_with_zero_uncertainty.yaml b/testsuite/test_data/file_with_zero_uncertainty.yaml new file mode 100644 index 0000000..696a09a --- /dev/null +++ b/testsuite/test_data/file_with_zero_uncertainty.yaml @@ -0,0 +1,21 @@ +--- +independent_variables: + - header: {name: SQRT(S), units: GEV} + values: + - value: 7000 + - value: 8000 +dependent_variables: + - header: {name: SIG(total), units: FB} + qualifiers: + - {name: RE, value: P P --> Z0 Z0 X} + values: + - value: 6.7 + errors: + - {symerror: 0.45, label: stat} + - {asymerror: {plus: 0.4, minus: -0.3}, label: sys} + - {symerror: 0.34, label: "sys,lumi"} + - value: 5.7 + errors: + - {symerror: 0.0, label: stat} + - {asymerror: {plus: 0.0, minus: 0.0}, label: sys} + - {symerror: 0.0, label: "sys,lumi"} \ No newline at end of file diff --git a/testsuite/test_data/valid_data_with_error.yaml b/testsuite/test_data/valid_data_with_percent.yaml similarity index 99% rename from testsuite/test_data/valid_data_with_error.yaml rename to testsuite/test_data/valid_data_with_percent.yaml index 1a73234..bdedd1b 100644 --- a/testsuite/test_data/valid_data_with_error.yaml +++ b/testsuite/test_data/valid_data_with_percent.yaml @@ -1,14 +1,4 @@ - --- -name: 'Table 1' -label: 'Data from Table 4' -description: | - Measured double-differential dijet cross sections for the range 0.0 <= y* < 0.5 and jet radius parameter R = 0.4. The statistical uncertainties from data and MC simulation have been combined. The three columns correspond to nominal, stronger or weaker correlations between jet energy scale uncertainty components -keywords: - - {name: reactions, values: ['P P --> JET JET X']} - - {name: observables, values: ['D2SIG/DM/DYRAP']} - - {name: cmenergies, values: [7000.0]} -additional_resources: independent_variables: - header: {name: 'M(2JET)', units: 'TEV'} values: @@ -49,7 +39,7 @@ dependent_variables: - value: 777000.0 errors: - {symerror: 0.73%, label: 'stat'} - - {asymerror: {plus: 3.1%, minus: -3.0%}, label: 'sys'} + - {asymerror: {plus: '', minus: -3.0%}, label: 'sys'} # empty string for 'plus' component - {symerror: 0.4%, label: 'sys'} - {symerror: 0.1%, label: 'sys'} - {symerror: 0.2%, label: 'sys'} @@ -61,7 +51,7 @@ dependent_variables: - {symerror: 1.0%, label: 'sys'} - {symerror: 0.3%, label: 'sys'} - {symerror: 0.8%, label: 'sys'} - - {asymerror: {plus: 0.5%, minus: -0.6%}, label: 'sys'} + - {asymerror: {plus: 0.5%, minus: ''}, label: 'sys'} # empty string for 'minus' component - {symerror: 0.0%, label: 'sys'} - {symerror: 0.0%, label: 'sys'} - {symerror: 0.0%, label: 'sys'} diff --git a/testsuite/test_data/valid_data_with_zero_percent.yaml b/testsuite/test_data/valid_data_with_zero_percent.yaml new file mode 100644 index 0000000..bc98836 --- /dev/null +++ b/testsuite/test_data/valid_data_with_zero_percent.yaml @@ -0,0 +1,89 @@ +--- +independent_variables: + - header: {name: 'M(2JET)', units: 'TEV'} + values: + - {low: 0.26, high: 0.31} +dependent_variables: + - header: {name: 'D2(SIG)/DM(2JET)/DYRAP*', units: 'PB*TEV*-1'} + qualifiers: + - {name: SQRT(S), value: '7000.0', units: 'GeV'} + - {name: YRAP*, value: '0.0-0.5'} + - {name: R, value: '0.4'} + - {name: RE, value: 'P P --> JET JET X'} + - {name: PT(JET1), value: '> 100', units: 'GEV'} + - {name: PT(JET2), value: '> 50', units: 'GEV'} + - {name: ABS(YRAP(JET1)), value: '< 3.0'} + - {name: ABS(YRAP(JET2)), value: '< 3.0'} + - {name: Correlation assumptions, value: 'Nominal'} + values: + - value: 777000.0 + errors: + - {symerror: 0.0%, label: 'stat'} + - {asymerror: {plus: 0.0%, minus: -0.0%}, label: 'sys'} + - {symerror: 0.0%, label: 'sys'} + - {symerror: 0.0%, label: 'sys'} + - {symerror: 0.0%, label: 'sys'} + - {symerror: 0.0%, label: 'sys'} + - {symerror: 0.0%, label: 'sys'} + - {symerror: 0.0%, label: 'sys'} + - {symerror: 0.0%, label: 'sys'} + - {symerror: 0.0%, label: 'sys'} + - {symerror: 0.0%, label: 'sys'} + - {symerror: 0.0%, label: 'sys'} + - {symerror: 0.0%, label: 'sys'} + - {asymerror: {plus: 0.0%, minus: -0.0%}, label: 'sys'} + - {symerror: 0.0%, label: 'sys'} + - {symerror: 0.0%, label: 'sys'} + - {symerror: 0.0%, label: 'sys'} + - {symerror: 0.0%, label: 'sys'} + - {symerror: 0.0%, label: 'sys'} + - {symerror: 0.0%, label: 'sys'} + - {symerror: 0.0%, label: 'sys'} + - {symerror: 0.0%, label: 'sys'} + - {asymerror: {plus: 0.0%, minus: -0.0%}, label: 'sys'} + - {asymerror: {plus: 0.0%, minus: -0.0%}, label: 'sys'} + - {symerror: 0.0%, label: 'sys'} + - {asymerror: {plus: 0.0%, minus: -0.0%}, label: 'sys'} + - {symerror: 0.0%, label: 'sys'} + - {asymerror: {plus: 0.0%, minus: -0.0%}, label: 'sys'} + - {symerror: 0.0%, label: 'sys'} + - {symerror: 0.0%, label: 'sys'} + - {asymerror: {plus: 0.0%, minus: -0.0%}, label: 'sys'} + - {asymerror: {plus: 0.0%, minus: -0.0%}, label: 'sys'} + - {symerror: 0.0%, label: 'sys'} + - {symerror: 0.0%, label: 'sys'} + - {symerror: 0.0%, label: 'sys'} + - {symerror: 0.0%, label: 'sys'} + - {asymerror: {plus: 0.0%, minus: -0.0%}, label: 'sys'} + - {symerror: 0.0%, label: 'sys'} + - {symerror: 0.0%, label: 'sys'} + - {symerror: 0.0%, label: 'sys'} + - {symerror: 0.0%, label: 'sys'} + - {symerror: 0.0%, label: 'sys'} + - {symerror: 0.0%, label: 'sys'} + - {symerror: 0.0%, label: 'sys'} + - {symerror: 0.0%, label: 'sys'} + - {symerror: 0.0%, label: 'sys'} + - {symerror: 0.0%, label: 'sys'} + - {symerror: 0.0%, label: 'sys'} + - {symerror: 0.0%, label: 'sys'} + - {symerror: 0.0%, label: 'sys'} + - {symerror: 0.0%, label: 'sys'} + - {symerror: 0.0%, label: 'sys'} + - {symerror: 0.0%, label: 'sys'} + - {symerror: 0.0%, label: 'sys'} + - {symerror: 0.0%, label: 'sys'} + - {symerror: 0.0%, label: 'sys'} + - {asymerror: {plus: 0.0%, minus: -0.0%}, label: 'sys'} + - {symerror: 0.0%, label: 'sys'} + - {symerror: 0.0%, label: 'sys'} + - {asymerror: {plus: -0.0%, minus: 0.0%}, label: 'sys'} + - {asymerror: {plus: -0.0%, minus: 0.0%}, label: 'sys'} + - {asymerror: {plus: 0.0%, minus: -0.0%}, label: 'sys'} + - {symerror: 0.0%, label: 'sys'} + - {asymerror: {plus: 0.0%, minus: -0.0%}, label: 'sys'} + - {symerror: 0.0%, label: 'sys'} + - {symerror: 0.0%, label: 'sys'} + - {symerror: 0.0%, label: 'sys'} + - {symerror: 0.0%, label: 'sys'} + - {symerror: 0.0%, label: 'sys'} \ No newline at end of file diff --git a/testsuite/test_data/valid_file.json b/testsuite/test_data/valid_file.json index 8fa058a..ca96c46 100644 --- a/testsuite/test_data/valid_file.json +++ b/testsuite/test_data/valid_file.json @@ -9,6 +9,9 @@ { "high": 7000, "low": 6500 + }, + { + "value": 8000 } ] } diff --git a/testsuite/test_data/valid_file.yaml b/testsuite/test_data/valid_file.yaml index a37ad10..41eab35 100644 --- a/testsuite/test_data/valid_file.yaml +++ b/testsuite/test_data/valid_file.yaml @@ -3,6 +3,7 @@ independent_variables: - header: {name: SQRT(S), units: GEV} values: - value: 7000 + - value: 8000 dependent_variables: - header: {name: SIG(total), units: FB} qualifiers: diff --git a/testsuite/validation_test.py b/testsuite/validation_test.py index d25bb10..1168730 100644 --- a/testsuite/validation_test.py +++ b/testsuite/validation_test.py @@ -193,9 +193,9 @@ def setUp(self): self.base_dir, 'test_data/invalid_file.json') - self.valid_file_error_percent_yaml = os.path.join( + self.valid_file_uncertainty_percent_yaml = os.path.join( self.base_dir, - 'test_data/valid_data_with_error.yaml' + 'test_data/valid_data_with_percent.yaml' ) self.invalid_syntax_data_file = os.path.join( @@ -212,6 +212,18 @@ def setUp(self): self.base_dir, 'test_data/valid_file_custom.yaml') + self.file_with_zero_uncertainty = os.path.join( + self.base_dir, + 'test_data/file_with_zero_uncertainty.yaml') + + self.file_with_zero_percent = os.path.join( + self.base_dir, + 'test_data/valid_data_with_zero_percent.yaml') + + self.file_with_inconsistent_values = os.path.join( + self.base_dir, + 'test_data/file_with_inconsistent_values.yaml') + def test_no_file_path_supplied(self): try: self.validator.validate(file_path=None) @@ -231,11 +243,11 @@ def test_invalid_yaml_file(self): self.validator.print_errors(self.invalid_file_yaml) - def test_valid_file_with_percent_errors(self): - print('___DATA_VALIDATION: Testing valid yaml percent error ___') - self.assertEqual(self.validator.validate(file_path=self.valid_file_error_percent_yaml), - False) - self.validator.print_errors(self.valid_file_error_percent_yaml) + def test_valid_file_with_percent_uncertainty(self): + print('___DATA_VALIDATION: Testing valid yaml percent uncertainty ___') + is_valid = self.validator.validate(file_path=self.valid_file_uncertainty_percent_yaml) + self.validator.print_errors(self.valid_file_uncertainty_percent_yaml) + self.assertEqual(is_valid, True) def test_valid_json_file(self): print('___DATA_VALIDATION: Testing valid json submission___') @@ -319,6 +331,24 @@ def test_ioerror_yaml_file(self): self.validator.print_errors(self.valid_file_yaml[:-1]) + def test_file_with_zero_uncertainty(self): + print('___DATA_VALIDATION: Testing file with zero uncertainty___') + self.assertEqual(self.validator.validate(file_path=self.file_with_zero_uncertainty), False) + + self.validator.print_errors(self.file_with_zero_uncertainty) + + def test_file_with_zero_percent(self): + print('___DATA_VALIDATION: Testing file with zero percent uncertainty___') + self.assertEqual(self.validator.validate(file_path=self.file_with_zero_percent), False) + + self.validator.print_errors(self.file_with_zero_percent) + + def test_file_with_inconsistent_values(self): + print('___DATA_VALIDATION: Testing file with inconsistent values list___') + self.assertEqual(self.validator.validate(file_path=self.file_with_inconsistent_values), False) + + self.validator.print_errors(self.file_with_inconsistent_values) + if __name__ == '__main__': unittest.main()