diff --git a/.travis.yml b/.travis.yml index b2e560f..424f562 100644 --- a/.travis.yml +++ b/.travis.yml @@ -16,8 +16,8 @@ install: script: - export CURRENT_PATH=`pwd` - - docker run -v $CURRENT_PATH:$CURRENT_PATH hepdata/hepdata-converter /bin/bash -c "cd $CURRENT_PATH && coverage run -m unittest discover hepdata_converter/testsuite 'test_*'" - - docker run -v $CURRENT_PATH:$CURRENT_PATH hepdata/hepdata-converter /bin/bash -c "cd $CURRENT_PATH && python setup.py install sdist && hepdata-converter -v" + - docker run -v $CURRENT_PATH:$CURRENT_PATH hepdata/hepdata-converter:0.1.35 /bin/bash -c "cd $CURRENT_PATH && coverage run -m unittest discover hepdata_converter/testsuite 'test_*'" + - docker run -v $CURRENT_PATH:$CURRENT_PATH hepdata/hepdata-converter:0.1.35 /bin/bash -c "cd $CURRENT_PATH && python setup.py install sdist && hepdata-converter -v" after_success: - coveralls diff --git a/docker b/docker index 84e4e47..189b4e4 160000 --- a/docker +++ b/docker @@ -1 +1 @@ -Subproject commit 84e4e471ee66a99335202d65fa45ca69f8892983 +Subproject commit 189b4e488e6864e1c94e61c4a7131a0d059035d5 diff --git a/hepdata_converter/__init__.py b/hepdata_converter/__init__.py index 3e4b41c..7b4cc61 100644 --- a/hepdata_converter/__init__.py +++ b/hepdata_converter/__init__.py @@ -2,10 +2,10 @@ import argparse import sys import version +from hepdata_validator import LATEST_SCHEMA_VERSION from parsers import Parser from writers import Writer - def convert(input, output=None, options={}): """Converts a supported ``input_format`` (*oldhepdata*, *yaml*) to a supported ``output_format`` (*csv*, *root*, *yaml*, *yoda*). @@ -79,6 +79,7 @@ def _main(arguments=sys.argv): parser.add_argument("--output-format", '-o', action='store', default='yaml', help='format of the output file/s (default: yaml) [choose one option from Writers section below]') parser.add_argument("--version", '-v', action='store_const', const=True, default=False, help='Show hepdata-converter version') parser.add_argument("--hepdata-doi", '-d', action='store', default='', help='Pass HEPData DOI, e.g. "10.17182/hepdata.74247.v1"') + parser.add_argument("--validator-schema-version", '-s', action='store', default=LATEST_SCHEMA_VERSION, help='hepdata_validator schema version (default: %s)' % LATEST_SCHEMA_VERSION) parser.add_argument("input") parser.add_argument("output") diff --git a/hepdata_converter/parsers/yaml_parser.py b/hepdata_converter/parsers/yaml_parser.py index b2a1b50..51f96a0 100644 --- a/hepdata_converter/parsers/yaml_parser.py +++ b/hepdata_converter/parsers/yaml_parser.py @@ -4,6 +4,8 @@ from yaml import CSafeLoader as Loader except ImportError: #pragma: no cover from yaml import SafeLoader as Loader #pragma: no cover + +from hepdata_validator import LATEST_SCHEMA_VERSION from hepdata_validator.submission_file_validator import SubmissionFileValidator from hepdata_validator.data_file_validator import DataFileValidator from hepdata_converter.parsers import Parser, ParsedData, Table @@ -33,6 +35,8 @@ class YAML(Parser): def __init__(self, *args, **kwargs): super(YAML, self).__init__(*args, **kwargs) + self.validator_schema_version = kwargs.get('validator_schema_version', LATEST_SCHEMA_VERSION) + def _pretty_print_errors(self, message_dict): return ' '.join( @@ -64,7 +68,7 @@ def parse(self, data_in, *args, **kwargs): if len(submission_data) == 0: raise RuntimeError("Submission file (%s) is empty" % data_in) - submission_file_validator = SubmissionFileValidator() + submission_file_validator = SubmissionFileValidator(schema_version=self.validator_schema_version) if not submission_file_validator.validate(file_path=data_in, data=submission_data): raise RuntimeError( @@ -76,7 +80,7 @@ def parse(self, data_in, *args, **kwargs): tables = [] # validator for table data - data_file_validator = DataFileValidator() + data_file_validator = DataFileValidator(schema_version=self.validator_schema_version) index = 0 for i in range(0, len(submission_data)): diff --git a/hepdata_converter/testsuite/__init__.py b/hepdata_converter/testsuite/__init__.py index 9552cc5..b26f862 100644 --- a/hepdata_converter/testsuite/__init__.py +++ b/hepdata_converter/testsuite/__init__.py @@ -6,6 +6,11 @@ import time import yaml +# We try to load using the CSafeLoader for speed improvements. +try: + from yaml import CSafeLoader as Loader +except ImportError: #pragma: no cover + from yaml import SafeLoader as Loader #pragma: no cover def _parse_path_arguments(sample_file_name): _sample_file_name = list(sample_file_name) @@ -105,7 +110,7 @@ def assertMultiLineAlmostEqual(self, first, second, msg=None): for i in xrange(len(lines)): self.assertEqual(lines[i].strip(), orig_lines[i].strip()) - def assertDirsEqual(self, first_dir, second_dir, file_content_parser=lambda x: list(yaml.load_all(x)), exclude=[], msg=None): + def assertDirsEqual(self, first_dir, second_dir, file_content_parser=lambda x: list(yaml.load_all(x, Loader=Loader)), exclude=[], msg=None): self.assertEqual(list(os.walk(first_dir))[1:], list(os.walk(second_dir))[1:], msg) dirs = list(os.walk(first_dir)) for file in dirs[0][2]: diff --git a/hepdata_converter/testsuite/test_arraywriter.py b/hepdata_converter/testsuite/test_arraywriter.py index 4201064..7afbc23 100644 --- a/hepdata_converter/testsuite/test_arraywriter.py +++ b/hepdata_converter/testsuite/test_arraywriter.py @@ -9,12 +9,15 @@ class ArrayWriterTestSuite(WriterTestSuite): def test_select_table(self, submission_filepath): csv_content = convert(submission_filepath, options={'input_format': 'yaml', 'output_format': 'csv', + 'validator_schema_version': '0.1.0', 'table': os.path.join(submission_filepath, 'data1.yaml')}) csv_content = convert(submission_filepath, options={'input_format': 'yaml', 'output_format': 'csv', + 'validator_schema_version': '0.1.0', 'table': 'Table 1'}) csv_content = convert(submission_filepath, options={'input_format': 'yaml', 'output_format': 'csv', + 'validator_schema_version': '0.1.0', 'table': 0}) diff --git a/hepdata_converter/testsuite/test_clitools.py b/hepdata_converter/testsuite/test_clitools.py index 8146012..257e58e 100644 --- a/hepdata_converter/testsuite/test_clitools.py +++ b/hepdata_converter/testsuite/test_clitools.py @@ -12,8 +12,11 @@ def test_wrong_call(self): @insert_data_as_str('csv/table_1.csv') def test_convert_yaml2csv(self, submission_file, table_csv): output_path = os.path.join(self.current_tmp, 'output.csv') - code, message = hepdata_converter._main(['--input-format', 'yaml', '--output-format', 'csv', - '--table', 'Table 1', '--pack', submission_file, output_path]) + code, message = hepdata_converter._main(['--input-format', 'yaml', + '--output-format', 'csv', + '--table', 'Table 1', + '--validator-schema-version', '0.1.0', + '--pack', submission_file, output_path]) self.assertEqual(code, 0, message) self.assertTrue(os.path.exists(output_path)) diff --git a/hepdata_converter/testsuite/test_convert.py b/hepdata_converter/testsuite/test_convert.py index 370aeb8..31d98a2 100644 --- a/hepdata_converter/testsuite/test_convert.py +++ b/hepdata_converter/testsuite/test_convert.py @@ -5,6 +5,11 @@ from hepdata_converter.testsuite import insert_path from hepdata_converter.testsuite.test_writer import WriterTestSuite +# We try to load using the CSafeLoader for speed improvements. +try: + from yaml import CSafeLoader as Loader +except ImportError: #pragma: no cover + from yaml import SafeLoader as Loader #pragma: no cover class ConvertTestSuite(WriterTestSuite): """Test suite for Parser factory class @@ -98,7 +103,7 @@ def test_submission(self): hepdata_converter.convert(StringIO.StringIO(self.simple_submission), self.current_tmp, options={'input_format': 'oldhepdata'}) with open(os.path.join(self.current_tmp, 'submission.yaml')) as submission_file: - self.assertEqual(list(yaml.load_all(submission_file)), list(yaml.load_all(self.correct_submit_output))) + self.assertEqual(list(yaml.load_all(submission_file, Loader=Loader)), list(yaml.load_all(self.correct_submit_output, Loader=Loader))) def test_not_implemented_writer(self): """This feature is not implemented yet, but to get test coverage it is tested, @@ -108,6 +113,8 @@ def test_not_implemented_writer(self): @insert_path('yaml_full') def test_same_type_conversion(self, yaml_path): - hepdata_converter.convert(yaml_path, self.current_tmp, options={'input_format': 'yaml', 'output_format': 'yaml'}) + hepdata_converter.convert(yaml_path, self.current_tmp, options={'input_format': 'yaml', + 'output_format': 'yaml', + 'validator_schema_version': '0.1.0'}) # exclude data6.yaml and data7.yaml because they are not listed in submission.yaml - self.assertDirsEqual(yaml_path, self.current_tmp, exclude=['data6.yaml', 'data7.yaml']) \ No newline at end of file + self.assertDirsEqual(yaml_path, self.current_tmp, exclude=['data6.yaml', 'data7.yaml']) diff --git a/hepdata_converter/testsuite/test_csvwriter.py b/hepdata_converter/testsuite/test_csvwriter.py index b7500b9..34403f6 100644 --- a/hepdata_converter/testsuite/test_csvwriter.py +++ b/hepdata_converter/testsuite/test_csvwriter.py @@ -17,7 +17,8 @@ def test_csvwriter_options(self, table_1_content): csv_content = convert(self.submission_filepath, options={'input_format': 'yaml', 'output_format': 'csv', 'table': 'Table 1', - 'pack': True}) + 'pack': True, + 'validator_schema_version': '0.1.0'}) self.assertMultiLineAlmostEqual(table_1_content, csv_content) @@ -26,7 +27,8 @@ def test_2_qualifiers_2_iv_pack(self, table_9_content): csv_content = convert(self.submission_filepath, options={'input_format': 'yaml', 'output_format': 'csv', 'table': 'Table 9', - 'pack': True}) + 'pack': True, + 'validator_schema_version': '0.1.0'}) self.assertMultiLineAlmostEqual(table_9_content, csv_content) @@ -35,7 +37,8 @@ def test_2_qualifiers_2_iv_pack(self, table_9_content): def test_multiple_tables_pack(self, table_1_content, table_9_content): convert(self.submission_filepath, self.current_tmp, options={'input_format': 'yaml', 'output_format': 'csv', - 'pack': True}) + 'pack': True, + 'validator_schema_version': '0.1.0'}) with open(os.path.join(self.current_tmp, 'Table1.csv'), 'r') as f: self.assertMultiLineAlmostEqual(table_1_content, f.read()) @@ -48,6 +51,7 @@ def test_2_qualifiers_2_iv_unpack(self, table_9_content): csv_content = convert(self.submission_filepath, options={'input_format': 'yaml', 'output_format': 'csv', 'table': 'Table 9', + 'validator_schema_version': '0.1.0', 'separator': ';', 'pack': False}) self.assertMultiLineAlmostEqual(table_9_content, csv_content) @@ -55,8 +59,11 @@ def test_2_qualifiers_2_iv_unpack(self, table_9_content): @insert_data_as_str('csv/table_9_unpacked.csv') def test_cli(self, table_9_content): csv_filepath = os.path.join(self.current_tmp, 'tab.csv') - hepdata_converter._main(['--output-format', 'csv', '--table', 'Table 9', '--separator', ';', self.submission_filepath, - csv_filepath]) + hepdata_converter._main(['--output-format', 'csv', + '--table', 'Table 9', + '--validator-schema-version', '0.1.0', + '--separator', ';', + self.submission_filepath, csv_filepath]) with open(csv_filepath, 'r') as csv_file: self.assertEqual(table_9_content, csv_file.read()) @@ -65,7 +72,10 @@ def test_cli(self, table_9_content): @insert_data_as_str('csv/table_9_unpacked_comma.csv') def test_no_dir_output(self, table_1_content, table_9_content): csv_filepath = os.path.join(self.current_tmp, 'csv_dir') - hepdata_converter._main(['--output-format', 'csv', '--separator', ',', self.submission_filepath, + hepdata_converter._main(['--output-format', 'csv', + '--validator-schema-version', '0.1.0', + '--separator', ',', + self.submission_filepath, csv_filepath]) self.assertTrue(os.path.exists(csv_filepath)) diff --git a/hepdata_converter/testsuite/test_oldhepdata.py b/hepdata_converter/testsuite/test_oldhepdata.py index cbd5dda..66dd183 100644 --- a/hepdata_converter/testsuite/test_oldhepdata.py +++ b/hepdata_converter/testsuite/test_oldhepdata.py @@ -15,7 +15,7 @@ class OldHEPDataTestSuite(WriterTestSuite): def test_parse_submission(self, oldhepdata_file, yaml_path): oldhepdata_p = OldHEPData() oldhepdata_parsed_data = oldhepdata_p.parse(oldhepdata_file) - yaml_p = yaml_parser.YAML() + yaml_p = yaml_parser.YAML(validator_schema_version='0.1.0') yaml_parsed_data = yaml_p.parse(yaml_path) self.assertEqual(yaml_parsed_data, oldhepdata_parsed_data) @@ -36,7 +36,7 @@ def test_parse_large_old_submission(self, oldhepdata_file, yaml_path): oldhepdata_p = OldHEPData() oldhepdata_parsed_data = oldhepdata_p.parse(oldhepdata_file) - yaml_p = yaml_parser.YAML() + yaml_p = yaml_parser.YAML(validator_schema_version='0.1.0') yaml_parsed_data = yaml_p.parse(yaml_path) assert(len(yaml_parsed_data.tables) is len(oldhepdata_parsed_data.tables)) diff --git a/hepdata_converter/testsuite/test_rootwriter.py b/hepdata_converter/testsuite/test_rootwriter.py index ecee9df..cd27a45 100644 --- a/hepdata_converter/testsuite/test_rootwriter.py +++ b/hepdata_converter/testsuite/test_rootwriter.py @@ -43,7 +43,8 @@ class ROOTWriterTestSuite(WriterTestSuite): def test_simple_parse(self, yaml_full_path, full_root_path): output_file_path = os.path.join(self.current_tmp, 'datafile.root') hepdata_converter.convert(yaml_full_path, output_file_path, - options={'output_format': 'root'}) + options={'output_format': 'root', + 'validator_schema_version': '0.1.0'}) self.assertNotEqual(os.stat(output_file_path).st_size, 0, 'output root file is empty') @@ -55,7 +56,8 @@ def test_simple_parse(self, yaml_full_path, full_root_path): with open(output_file_path, 'w') as output: hepdata_converter.convert(yaml_full_path, output, - options={'output_format': 'root'}) + options={'output_format': 'root', + 'validator_schema_version': '0.1.0'}) self.assertNotEqual(os.stat(output_file_path).st_size, 0, 'output root file is empty') @@ -80,7 +82,9 @@ def test_simple_parse(self, yaml_full_path, full_root_path): def test_th1_parse(self, yaml_full_path, full_root_path): output_file_path = os.path.join(self.current_tmp, 'datafile.root') hepdata_converter.convert(yaml_full_path, output_file_path, - options={'output_format': 'root', 'table': 'data2.yaml'}) + options={'output_format': 'root', + 'table': 'data2.yaml', + 'validator_schema_version': '0.1.0'}) pass @insert_paths('yaml/ins1283183', 'yaml/ins1397637', 'yaml/ins699647', 'yaml/ins1413748') diff --git a/hepdata_converter/testsuite/test_yamlparser.py b/hepdata_converter/testsuite/test_yamlparser.py index c397ba7..c494719 100644 --- a/hepdata_converter/testsuite/test_yamlparser.py +++ b/hepdata_converter/testsuite/test_yamlparser.py @@ -12,6 +12,7 @@ class YAMLWriterTestSuite(WriterTestSuite): def test_no_qal_parse(self, yaml_path, table_5_noqual): data = hepdata_converter.convert(yaml_path, options={'input_format': 'yaml', 'output_format': 'csv', - 'single_file': True}) + 'single_file': True, + 'validator_schema_version': '0.1.0'}) self.assertEqual(data, table_5_noqual) diff --git a/hepdata_converter/testsuite/test_yodawriter.py b/hepdata_converter/testsuite/test_yodawriter.py index 1b8ae2d..5987dbf 100644 --- a/hepdata_converter/testsuite/test_yodawriter.py +++ b/hepdata_converter/testsuite/test_yodawriter.py @@ -13,7 +13,8 @@ class YODAWriterTestSuite(WriterTestSuite): def test_simple_parse(self, yaml_simple_path, yoda_template): output_file_path = os.path.join(self.current_tmp, 'datafile.yoda') hepdata_converter.convert(yaml_simple_path, output_file_path, - options={'output_format': 'yoda'}) + options={'output_format': 'yoda', + 'validator_schema_version': '0.1.0'}) self.assertNotEqual(os.stat(output_file_path).st_size, 0, 'output yoda file is empty') with open(output_file_path, 'r') as f: diff --git a/hepdata_converter/writers/yaml_writer.py b/hepdata_converter/writers/yaml_writer.py index 2b3befc..b15cfc6 100644 --- a/hepdata_converter/writers/yaml_writer.py +++ b/hepdata_converter/writers/yaml_writer.py @@ -54,14 +54,14 @@ def write(self, data_in, data_out, *args, **kwargs): if not self.single_file: self.create_dir(data_out) with open(os.path.join(data_out, 'submission.yaml'), 'w') as submission_file: - yaml.dump_all([data] + [table.metadata for table in tables], submission_file, Dumper=Dumper) + yaml.dump_all([data] + [table.metadata for table in tables], submission_file, Dumper=Dumper, default_flow_style=None) for table in tables: with open(os.path.join(data_out, table.data_file), 'w') as table_file: - yaml.dump(table.data, table_file, Dumper=Dumper) + yaml.dump(table.data, table_file, Dumper=Dumper, default_flow_style=None) else: if isinstance(data_out, (str, unicode)): with open(data_out, 'w') as submission_file: - yaml.dump_all([data] + [table.all_data for table in tables], submission_file, Dumper=Dumper) + yaml.dump_all([data] + [table.all_data for table in tables], submission_file, Dumper=Dumper, default_flow_style=None) else: # expect filelike object - yaml.dump_all([data] + [table.all_data for table in tables], data_out, Dumper=Dumper) \ No newline at end of file + yaml.dump_all([data] + [table.all_data for table in tables], data_out, Dumper=Dumper, default_flow_style=None) diff --git a/hepdata_converter/writers/yoda_writer.py b/hepdata_converter/writers/yoda_writer.py index 52012e5..f281aa0 100644 --- a/hepdata_converter/writers/yoda_writer.py +++ b/hepdata_converter/writers/yoda_writer.py @@ -118,7 +118,7 @@ def _write_table(self, data_out, table): + 'd' + table_num.zfill(2) + '-x01-y' + str(idep + 1).zfill(2) graph.setAnnotation('IsRef', '1') yoda.core.writeYODA(graph, data_out) - data_out.write('\n\n') + data_out.write('\n') def write(self, data_in, data_out, *args, **kwargs): """