From a7b1d08e3dd2dfe1d3e576d633eb3269c5ee0918 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= Date: Tue, 17 Mar 2020 11:29:49 -0400 Subject: [PATCH 1/5] Make schema-folder a dynamic arg. --- hepdata_validator/__init__.py | 5 +++-- hepdata_validator/data_file_validator.py | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/hepdata_validator/__init__.py b/hepdata_validator/__init__.py index b221100..794bde8 100644 --- a/hepdata_validator/__init__.py +++ b/hepdata_validator/__init__.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # # This file is part of HEPData. -# Copyright (C) 2016 CERN. +# Copyright (C) 2020 CERN. # # HEPData is free software; you can redistribute it # and/or modify it under the terms of the GNU General Public License as @@ -47,13 +47,14 @@ def __init__(self, *args, **kwargs): self.messages = {} self.default_schema_file = '' self.schemas = kwargs.get('schemas', {}) + self.schema_folder = kwargs.get('schema_folder', 'schemas') self.schema_version = kwargs.get('schema_version', LATEST_SCHEMA_VERSION) if self.schema_version not in VALID_SCHEMA_VERSIONS: raise ValueError('Invalid schema version ' + self.schema_version) def _get_schema_filepath(self, schema_filename): full_filepath = os.path.join(self.base_path, - 'schemas', + self.schema_folder, self.schema_version, schema_filename) diff --git a/hepdata_validator/data_file_validator.py b/hepdata_validator/data_file_validator.py index ccb1024..a9b53c9 100644 --- a/hepdata_validator/data_file_validator.py +++ b/hepdata_validator/data_file_validator.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # # This file is part of HEPData. -# Copyright (C) 2016 CERN. +# Copyright (C) 2020 CERN. # # HEPData is free software; you can redistribute it # and/or modify it under the terms of the GNU General Public License as @@ -66,7 +66,7 @@ def load_custom_schema(self, type, schema_file_path=None): _schema_file = schema_file_path else: _schema_file = os.path.join(self.base_path, - 'schemas', + self.schema_folder, self.schema_version, "{0}_schema.json".format(type)) From 856ce73a8e7d42bdd869e0c8f6c7c3242cfc28ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= Date: Tue, 17 Mar 2020 12:07:49 -0400 Subject: [PATCH 2/5] Implement HTTPSchemaDownloader --- .gitignore | 5 +- hepdata_validator/schema_downloader.py | 124 +++++++++++++++++++++++++ setup.py | 3 +- 3 files changed, 130 insertions(+), 2 deletions(-) create mode 100644 hepdata_validator/schema_downloader.py diff --git a/.gitignore b/.gitignore index 68f009a..e7767fc 100644 --- a/.gitignore +++ b/.gitignore @@ -56,4 +56,7 @@ docs/_build/ target/ # PyCharm -.idea/ \ No newline at end of file +.idea/ + +# Downloaded schemas +hepdata_validator/schemas_remote/ diff --git a/hepdata_validator/schema_downloader.py b/hepdata_validator/schema_downloader.py new file mode 100644 index 0000000..ca45c83 --- /dev/null +++ b/hepdata_validator/schema_downloader.py @@ -0,0 +1,124 @@ +# -*- coding: utf-8 -*- +# +# This file is part of HEPData. +# Copyright (C) 2020 CERN. +# +# HEPData is free software; you can redistribute it +# and/or modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of the +# License, or (at your option) any later version. +# +# HEPData is distributed in the hope that it will be +# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with HEPData; if not, write to the +# Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, +# MA 02111-1307, USA. +# +# In applying this license, CERN does not +# waive the privileges and immunities granted to it by virtue of its status +# as an Intergovernmental Organization or submit itself to any jurisdiction. + +import os +import requests +from abc import ABCMeta +from abc import abstractmethod + + +class SchemaDownloaderInterface(object): + """ + Interface for the schema downloader objects. + Used to validate schemas available across the internet. + """ + + __metaclass__ = ABCMeta + + @abstractmethod + def get_schema(self, schema_name): + """ + Retrieves the specified schema from a remote URL. + :param schema_name: str. + :return: str. + """ + + raise NotImplementedError() + + @abstractmethod + def save_locally(self, schema_name, schema_spec, overwrite): + """ + Saves the remote schema in the local file system + :param schema_name: str. + :param schema_spec: str. + :param overwrite: bool. + :return: None. + """ + + raise NotImplementedError() + + +class HTTPSchemaDownloader(SchemaDownloaderInterface): + """ + Object to download schemas using HTTP / HTTPS + Used to validate schemas available across the internet. + """ + + def __init__(self, endpoint, company, version): + """ + Initializes the local folder where schemas will be stored. + :param endpoint: str. + :param company: str. + :param version: str. + """ + + self.endpoint = endpoint + self.company = company + self.version = version + + self.saved_schema_folder = "schemas_remote" + self.saved_schema_path = self._build_local_path(company, version) + + def _build_local_path(self, company, version): + """ + Builds the remote schemas complete URL, up to the schema names + :param company: str + :param version: str. + :return: str. + """ + + base_path = os.path.dirname(__file__) + return os.path.join(base_path, self.saved_schema_folder, company, version) + + def get_schema(self, schema_name): + """ + Downloads the specified schema from a remote URL. + :param schema_name: str. + :return: str. + """ + + schema_url = self.endpoint + "/" + schema_name + schema_resp = requests.get(schema_url) + schema_resp.raise_for_status() + + return schema_resp.text + + def save_locally(self, schema_name, schema_spec, overwrite=False): + """ + Saves the remote schema in the local file system + :param schema_name: str. + :param schema_spec: str. + :param overwrite: bool. + :return: None. + """ + + file_path = os.path.join(self.saved_schema_path, schema_name) + + # Skip download if the file exist + if os.path.isfile(file_path) and not overwrite: + return + + os.makedirs(os.path.dirname(file_path), exist_ok=True) + with open(file_path, 'w') as f: + f.write(schema_spec) diff --git a/setup.py b/setup.py index 2c87262..1cd9a6c 100644 --- a/setup.py +++ b/setup.py @@ -83,7 +83,8 @@ def run_tests(self): extras_require=extras_require, install_requires=[ "pyyaml", - "jsonschema" + "jsonschema", + "requests", ], test_suite='hepdata_validator.testsuite', tests_require=test_requirements, From 689d84ba8e282f132d670d7e813fb74cc9919ce3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= Date: Tue, 17 Mar 2020 14:45:30 -0400 Subject: [PATCH 3/5] Define HTTPSchemaDownloader tests --- testsuite/test_schema_downloader.py | 121 ++++++++++++++++++++++++++++ 1 file changed, 121 insertions(+) create mode 100644 testsuite/test_schema_downloader.py diff --git a/testsuite/test_schema_downloader.py b/testsuite/test_schema_downloader.py new file mode 100644 index 0000000..2a45dfa --- /dev/null +++ b/testsuite/test_schema_downloader.py @@ -0,0 +1,121 @@ +import os +import pytest +from hepdata_validator.schema_downloader import HTTPSchemaDownloader +from requests.exceptions import HTTPError +from unittest.mock import patch + + +#################################################### +# Tests fixtures # +#################################################### + + +@pytest.fixture(scope="module") +def http_downloader(): + """ + Generates a valid HTTPSchemaDownloader using example names + """ + + return HTTPSchemaDownloader( + endpoint="https://testing.com/schemas/1.0.0", + company="testing.com", + version="1.0.0", + ) + + +#################################################### +# Tests mocks # +#################################################### + + +class MockedResponse(object): + + def __init__(self, content, http_code): + self.content = content + self.http_code = http_code + + def raise_for_status(self): + if self.http_code != 200: + raise HTTPError + + @property + def text(self): + return self.content + + +def get_patched_valid_response(url): + return MockedResponse('{"field_1": "value_1", "field_2": "value_2"}', 200) + + +def get_patched_invalid_response(url): + return MockedResponse("Not found", 404) + + +#################################################### +# HTTPSchemaDownloader tests # +#################################################### + + +@patch('requests.get', new=get_patched_valid_response) +def test_http_downloader_get_schema(http_downloader): + """ + Tests the HTTPSchemaDownloader with a real schema name + :param http_downloader: HTTPSchemaDownloader + """ + + file_name = "real_schema.json" + + schema_spec = http_downloader.get_schema(file_name) + assert len(schema_spec) > 0 + + +@patch('requests.get', new=get_patched_invalid_response) +def test_http_downloader_get_missing_schema(http_downloader): + """ + Tests the HTTPSchemaDownloader with a missing schema name + :param http_downloader: HTTPSchemaDownloader + """ + + file_name = "missing_schema.json" + + with pytest.raises(HTTPError): + http_downloader.get_schema(file_name) + + +def test_http_downloader_save_schema(http_downloader): + """ + Tests the HTTPSchemaDownloader with an invalid initialization + :param http_downloader: HTTPSchemaDownloader + """ + + schema_name = "dummy.json" + schema_spec = '{"key_1": "value_1", "key_2": "value_2"}' + + http_downloader.save_locally(schema_name, schema_spec, overwrite=True) + + expected_folder = http_downloader.saved_schema_path + expected_path = os.path.join(expected_folder, schema_name) + + assert os.path.isfile(expected_path) + + +def test_http_downloader_save_existing_schema(http_downloader): + """ + Tests the HTTPSchemaDownloader with an invalid initialization + :param http_downloader: HTTPSchemaDownloader + """ + + schema_name = "dummy.json" + schema_spec_1 = '{"key_1": "value_1", "key_2": "value_2"}' + schema_spec_2 = '{"key_1": "new_value_1", "key_2": "new_value_2"}' + + http_downloader.save_locally(schema_name, schema_spec_1, overwrite=True) + http_downloader.save_locally(schema_name, schema_spec_2, overwrite=False) + + expected_folder = http_downloader.saved_schema_path + expected_path = os.path.join(expected_folder, schema_name) + + with open(expected_path, 'r') as f: + file_content = f.read() + + assert file_content == schema_spec_1 From fe01107ea79f7501e2c43659e80a9aef809aae88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= Date: Tue, 17 Mar 2020 15:47:18 -0400 Subject: [PATCH 4/5] Update documentation --- README.rst | 51 +++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 43 insertions(+), 8 deletions(-) diff --git a/README.rst b/README.rst index 735c411..c19653a 100644 --- a/README.rst +++ b/README.rst @@ -61,7 +61,29 @@ Via GitHub (for developers): Usage ----- -To validate files, you need to instantiate a validator (I love OO). +To validate against remote schemas, instantiate a ``HTTPSchemaDownloader`` object. + +This object retrieves schemas from a remote location, and optionally save them in the local file system, +following the structure: ``schemas_remote///`` + +.. code:: python + + from hepdata_validator.schema_downloader import HTTPSchemaDownloader + + downloader = HTTPSchemaDownloader( + endpoint="https://scikit-hep.org/pyhf/schemas/1.0.0", + company="scikit-hep.org", + version="1.0.0", + ) + + schema_name = "defs.json" + schema_spec = downloader.get_schema(schema_name) + + # The downloader stores the remote schema in the local path + downloader.save_locally(schema_name, schema_spec) + + +To validate submissions, instantiate a ``SubmissionFileValidator`` object: .. code:: python @@ -80,7 +102,11 @@ To validate files, you need to instantiate a validator (I love OO). submission_file_validator.print_errors(submission_file_path) -Data file validation is exactly the same. +To validate data files, you need to instantiate a ``DataFileValidator`` object. + +In this case, the ``DataFileValidator`` can take a ``schema_folder`` argument to specify +the location of the schemas it is going to validate (by default ``schemas``). +This is useful when validating against schemas stored inside ``schemas_remote/``. .. code:: python @@ -125,13 +151,22 @@ HEPData submission. Schemas ------- -There are currently 2 versions of the JSON schemas, `0.1.0 +When considering **native HEP JSON schemas**, there are currently 2 versions: `0.1.0 `_ and `1.0.0 -`_. In most cases you should use -**1.0.0** (the default). If you need to use a different version, you can pass a keyword argument ``schema_version`` -when initialising the validator: +`_. +In most cases you should use **1.0.0** (the default). If you need to use a different version, +you can pass a keyword argument ``schema_version`` when initialising the validator: + +.. code:: python + + sub_validator = SubmissionFileValidator(schema_version='0.1.0') + data_validator = DataFileValidator(schema_version='0.1.0') + +When using **remotely defined schemas**, versions depend on the organization providing those schemas, +and it is their responsibility to offer a way of keeping track of different schemas versions. +An example may be: .. code:: python - submission_file_validator = SubmissionFileValidator(schema_version='0.1.0') - data_file_validator = DataFileValidator(schema_version='0.1.0') + sub_validator = SubmissionFileValidator(schema_folder='schemas_remote/scikit-hep.org', schema_version='1.0.0') + data_validator = DataFileValidator(schema_folder='schemas_remote/scikit-hep.org', schema_version='1.0.0') \ No newline at end of file From bb2acc7053a9acb3efc090916c3e20b9b5bed718 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sinclert=20P=C3=A9rez?= Date: Tue, 17 Mar 2020 16:37:36 -0400 Subject: [PATCH 5/5] Python2 compatibility changes --- hepdata_validator/schema_downloader.py | 9 ++++++++- setup.py | 1 + testsuite/test_schema_downloader.py | 2 +- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/hepdata_validator/schema_downloader.py b/hepdata_validator/schema_downloader.py index ca45c83..a777e29 100644 --- a/hepdata_validator/schema_downloader.py +++ b/hepdata_validator/schema_downloader.py @@ -114,11 +114,18 @@ def save_locally(self, schema_name, schema_spec, overwrite=False): """ file_path = os.path.join(self.saved_schema_path, schema_name) + file_folder = os.path.dirname(file_path) # Skip download if the file exist if os.path.isfile(file_path) and not overwrite: return - os.makedirs(os.path.dirname(file_path), exist_ok=True) + # This is compatible both with Python2 and Python3 + try: + os.makedirs(file_folder) + except OSError: + if not os.path.isdir(file_folder): + raise + with open(file_path, 'w') as f: f.write(schema_spec) diff --git a/setup.py b/setup.py index 1cd9a6c..18e1a04 100644 --- a/setup.py +++ b/setup.py @@ -13,6 +13,7 @@ 'pytest-cov>=1.8.0', 'pytest-pep8>=1.0.6', 'coverage>=3.7.1', + 'mock>=2.0.0', ] extras_require = { diff --git a/testsuite/test_schema_downloader.py b/testsuite/test_schema_downloader.py index 2a45dfa..e161467 100644 --- a/testsuite/test_schema_downloader.py +++ b/testsuite/test_schema_downloader.py @@ -2,7 +2,7 @@ import pytest from hepdata_validator.schema_downloader import HTTPSchemaDownloader from requests.exceptions import HTTPError -from unittest.mock import patch +from mock import patch ####################################################