Skip to content

Commit

Permalink
Merge d638ff2 into e6c3507
Browse files Browse the repository at this point in the history
  • Loading branch information
alisonrclarke committed Jul 2, 2021
2 parents e6c3507 + d638ff2 commit 2e411f8
Show file tree
Hide file tree
Showing 11 changed files with 190 additions and 58 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Expand Up @@ -16,7 +16,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [ '2.7', '3.6', '3.7', '3.8' ]
python-version: [ '3.6', '3.7', '3.8', '3.9' ]

steps:
- uses: actions/checkout@v2
Expand Down
25 changes: 25 additions & 0 deletions hepdata_validator/__init__.py
Expand Up @@ -116,6 +116,31 @@ def clear_messages(self):
"""
self.messages = {}

def add_validation_error(self, file_path, ve):
"""
Formats a validation error into a readable error message and adds it to
the messages dict
"""
location = ''
if ve.path:
for part in ve.path:
if type(part) == int:
location += '[{0}]'.format(part)
elif not location:
location = part
else:
location += '.' + part

message = ve.message
if location:
message += f" in '{location}'"
# Add expected schema section if it it's present and isn't the full schema
if isinstance(ve.schema, dict) and '$schema' not in ve.schema.keys():
message += f" (expected: {ve.schema})"

self.add_validation_message(ValidationMessage(file=file_path,
message=message))

def add_validation_message(self, message):
"""
Adds a message to the messages dict.
Expand Down
21 changes: 10 additions & 11 deletions hepdata_validator/data_file_validator.py
Expand Up @@ -124,10 +124,7 @@ def validate(self, **kwargs):
check_length_values(data)

except ValidationError as ve:
self.add_validation_message(ValidationMessage(
file=file_path,
message=ve.message + ' in ' + str(ve.instance),
))
self.add_validation_error(file_path, ve)

except UnsupportedDataSchemaException as ex:
self.add_validation_message(ValidationMessage(
Expand Down Expand Up @@ -155,14 +152,14 @@ def __unicode__(self):
def check_for_zero_uncertainty(data):
"""
Check that uncertainties are not all zero.
:param data: data table in YAML format
:return: raise ValidationError if uncertainties are all zero
"""
for dependent_variable in data['dependent_variables']:

if 'values' in dependent_variable:
for value in dependent_variable['values']:
for i, value in enumerate(dependent_variable['values']):

if 'errors' in value:
zero_uncertainties = []
Expand All @@ -183,7 +180,9 @@ def check_for_zero_uncertainty(data):
zero_uncertainties.append(False)

if len(zero_uncertainties) > 0 and all(zero_uncertainties):
raise ValidationError('Uncertainties should not all be zero', instance=value)
raise ValidationError("Uncertainties should not all be zero",
path=['dependent_variables', 'values', i, 'errors'],
instance=data['dependent_variables'])


def convert_to_float(error):
Expand All @@ -207,13 +206,13 @@ def check_length_values(data):
"""
Check that the length of the 'values' list is consistent for
each of the independent_variables and dependent_variables.
:param data: data table in YAML format
:return: raise ValidationError if inconsistent
"""
indep_count = [len(indep['values']) for indep in data['independent_variables']]
dep_count = [len(dep['values']) for dep in data['dependent_variables']]
if len(set(indep_count + dep_count)) > 1: # if more than one unique count
raise ValidationError("Inconsistent length of 'values' list: " +
"independent_variables%s, dependent_variables%s" % (str(indep_count), str(dep_count)),
instance=data)
raise ValidationError("Inconsistent length of 'values' list: " +
"independent_variables %s, dependent_variables %s" % (str(indep_count), str(dep_count)),
instance=data)
14 changes: 7 additions & 7 deletions hepdata_validator/submission_file_validator.py
Expand Up @@ -43,7 +43,7 @@ def validate(self, **kwargs):
try:
submission_file_schema = None
additional_file_section_schema = None

with open(self.default_schema_file, 'r') as submission_schema:
submission_file_schema = json.load(submission_schema)

Expand Down Expand Up @@ -74,9 +74,7 @@ def validate(self, **kwargs):
check_cmenergies(data_item)

except ValidationError as ve:
self.add_validation_message(
ValidationMessage(file=file_path,
message=ve.message + ' in ' + str(ve.instance)))
self.add_validation_error(file_path, ve)

if not self.has_errors(file_path):
return_value = True
Expand Down Expand Up @@ -111,7 +109,7 @@ def check_cmenergies(data_item):
:param data_item: YAML document from submission.yaml
:return: raise ValidationError if not numeric
"""
for keyword in data_item['keywords']:
for i, keyword in enumerate(data_item['keywords']):
if keyword['name'] == 'cmenergies':
cmenergies = keyword['values']
for cmenergy in cmenergies:
Expand All @@ -120,5 +118,7 @@ def check_cmenergies(data_item):
except ValueError:
m = re.match(r'^\d+\.?\d?-\d+\.?\d?$', cmenergy)
if not m or len(cmenergies) > 1:
raise ValidationError("Invalid value (in GeV) for cmenergies: %s" % cmenergy,
instance=data_item)
raise ValidationError("Invalid value (in GeV) for cmenergies: '%s'" % cmenergy,
path=['keywords', i, 'name', 'cmenergies'],
instance=data_item['keywords'],
schema={ "type": "number or hyphen-separated range of numbers e.g. 1.7-4.7"})
4 changes: 2 additions & 2 deletions testsuite/test_data/invalid_parser_submission.yaml
Expand Up @@ -8,7 +8,7 @@ keywords: # used for searching, possibly multiple values for each keyword
- { name: cmenergies, value: [7000]}
data_file: data3.yaml
additional_resources:
- location: "http:github.com/HEPData/hepdata"
- location: "https://github.com/HEPData/hepdata"
description: "Full source code for creating this data"
- location: "http:github.com/HEPData/hepdata"
- location: "https://github.com/HEPData/hepdata"
description: "Full source code for creating this data"
6 changes: 3 additions & 3 deletions testsuite/test_data/invalid_submission.yaml
Expand Up @@ -8,7 +8,7 @@ keywords: # used for searching, possibly multiple values for each keyword
- { name: cmenergies, value: [7000]}
data_file: 12321
additional_resources:
- location: "http:github.com/HEPData/hepdata"
- location: "https://github.com/HEPData/hepdata"
description: "Full source code for creating this data"
- location: "https://github.com/HEPData/hepdata"
description: "Full source code for creating this data"
- location: "http:github.com/HEPData/hepdata"
description: "Full source code for creating this data"
69 changes: 69 additions & 0 deletions testsuite/test_data/invalid_submission_license.yaml
@@ -0,0 +1,69 @@
---

additional_resources:
- location: "https://github.com/HEPData/hepdata"
description: "Full source code for creating this data"
- location: "https://github.com/HEPData/hepdata"
description: "Full source code for creating this data"
license:
name: 'GPL 2'
url: 'somewhere over the rainbow...'
description: blah

modifications: # what, by whom and when encoded or changed
- {action: 'Encoded', date: '27 FEB 2014', who: 'Graeme Watt'}
- {action: 'Modified', date: '30 APR 2014', who: 'Graeme Watt'}

comment: | # Information that applies to all data tables.
CERN-LHC. Measurements of the cross section for ZZ production using the 4l and 2l2nu decay channels in proton-proton collisions at a centre-of-mass energy of 7 TeV with 4.6 fb^-1 of data collected in 2011. The final states used are 4 electrons, 4 muons, 2 electrons and 2 muons, 2 electrons and missing transverse momentum, and 2 muons and missing transverse momentum (MET).
The cross section values reported in the tables should be multiplied by a factor of 1.0141 to take into account the updated value of the integrated luminosity for the ATLAS 2011 data taking period. The uncertainty on the global normalisation ("Lumi") remains at 1.8%. See Eur.Phys.J. C73 (2013) 2518 for more details.
The 4l channel fiducial region is defined as:
- 4e, 4mu or 2e2mu
- Ambiguities in pairing are resolved by choosing the combination that results in the smaller value of the sum |mll - mZ| for the two pairs, where mll is the mass of the dilepton system.
- ptLepton > 7 GeV (at least one with ptLepton > 20 (25) GeV for muons (electrons))
- |etaLepton| < 3.16
- At least one lepton pair is required to have invariant mass between 66 and 116 GeV. If the second pair also satisfies this, the event is ZZ, otherwise if the second pair satisfies mll > 20 GeV it is ZZ*.
- min(DeltaR(l,l)) > 0.2.
The 2l2nu channel fiducial region is defined as:
- 2e+MET or 2mu+MET
- ptLepton > 20 GeV
- |etaLepton| < 2.5
- mll must be between 76 and 106 GeV
- -MET*cos(phi_METZ)>75 GeV, where phi_METZ is the angle between the Z and the MET
- |MET - pTZ| / pTZ < 0.4, where pTZ is the transverse momentum of the dilepton system
- No events with a jet for which ptJet > 25 GeV and |etaJet| < 4.5
- No events with a third lepton for which ptLepton > 10 GeV
- min(DeltaR(l,l)) > 0.3
---
name: "Table 1"
location: Page 17 of preprint
description: The measured fiducial cross sections. The first systematic uncertainty is the combined systematic uncertainty excluding luminosity, the second is the luminosity
keywords: # used for searching, possibly multiple values for each keyword
- { name: reactions, values: [P P --> Z0 Z0 X]}
- { name: observables, values: [SIG]}
- { name: cmenergies, values: [7000]}
data_file: data1.yaml
data_license: {description: null, name: null, url: null}
additional_resources:
- location: "https://github.com/HEPData/hepdata"
description: "Full source code for creating this data"
- location: "https://github.com/HEPData/hepdata"
description: "Full source code for creating this data"
license:
name: 'GPL 2'
url: 'some url'
---
# This is Table 2.
name: "Table 2"
location: Page 20 of preprint
description: The measured total cross sections. The first systematic uncertainty is the combined systematic uncertainty excluding luminosity, the second is the luminosity
keywords: # used for searching, possibly multiple values for each keyword
- { name: reactions, values: [P P --> Z0 Z0 X]}
- { name: observables, values: [SIG]}
- { name: cmenergies, values: [7000]}
data_file: data2.yaml
4 changes: 2 additions & 2 deletions testsuite/test_data/invalid_syntax_submission.yaml
Expand Up @@ -8,7 +8,7 @@ keywords: # used for searching, possibly multiple values for each keyword
- { name: cmenergies, value: [7000]}
data_file:'12321'
additional_resources:
- location: "http:github.com/HEPData/hepdata"
- location: "https://github.com/HEPData/hepdata"
description: "Full source code for creating this data"
- location: "http:github.com/HEPData/hepdata"
- location: "https://github.com/HEPData/hepdata"
description: "Full source code for creating this data"
10 changes: 5 additions & 5 deletions testsuite/test_data/valid_submission_license.yaml
@@ -1,9 +1,9 @@
---

additional_resources:
- location: "http:github.com/HEPData/hepdata"
- location: "https://github.com/HEPData/hepdata"
description: "Full source code for creating this data"
- location: "http:github.com/HEPData/hepdata"
- location: "https://github.com/HEPData/hepdata"
description: "Full source code for creating this data"
license:
name: 'GPL 2'
Expand Down Expand Up @@ -52,9 +52,9 @@ data_license:
name: 'GPL 2'
url: 'some url'
additional_resources:
- location: "http:github.com/HEPData/hepdata"
- location: "https://github.com/HEPData/hepdata"
description: "Full source code for creating this data"
- location: "http:github.com/HEPData/hepdata"
- location: "https://github.com/HEPData/hepdata"
description: "Full source code for creating this data"
license:
name: 'GPL 2'
Expand All @@ -68,4 +68,4 @@ keywords: # used for searching, possibly multiple values for each keyword
- { name: reactions, values: [P P --> Z0 Z0 X]}
- { name: observables, values: [SIG]}
- { name: cmenergies, values: [7000]}
data_file: data2.yaml
data_file: data2.yaml

0 comments on commit 2e411f8

Please sign in to comment.