From e71d410f99e8584a9ebd779973e603ba2ca47118 Mon Sep 17 00:00:00 2001 From: Daniel Smith Date: Wed, 21 Mar 2018 15:42:27 -0400 Subject: [PATCH 01/18] First pass at a simple JSON Schema --- schema/build_dev_schema.py | 17 +++++++++++++++++ schema/dev/base.json | 20 ++++++++++++++++++++ schema/dev/molecule.json | 20 ++++++++++++++++++++ 3 files changed, 57 insertions(+) create mode 100644 schema/build_dev_schema.py create mode 100644 schema/dev/base.json create mode 100644 schema/dev/molecule.json diff --git a/schema/build_dev_schema.py b/schema/build_dev_schema.py new file mode 100644 index 0000000..d79d58d --- /dev/null +++ b/schema/build_dev_schema.py @@ -0,0 +1,17 @@ +""" +A simple program to construct the input and ouput Quantum Chemistry Schema's +from the development branch +""" +import json + +with open("dev/base.json") as infile: + schema = json.load(infile) + +for part in ["molecule"]: + with open("dev/" + part + ".json") as infile: + schema["definitions"][part] = json.load(infile) + +with open("qc_schema.schema", "w") as outfile: + json.dump(schema, outfile) + +print(json.dumps(schema, indent=2)) diff --git a/schema/dev/base.json b/schema/dev/base.json new file mode 100644 index 0000000..1c21aad --- /dev/null +++ b/schema/dev/base.json @@ -0,0 +1,20 @@ +{ + "$schema": "http://json-schema.org/draft-06/schema#", + "description": "The MolSSI Quantum Chemistry Schema", + "type": "object", + "version": "0.1.dev", + "properties": { + "molecule": { + "type": "object", + "$ref": "#/definitions/molecule" + }, + "driver": { + "enum": ["energy", "gradient", "hessian", "property"] + }, + "keywords": { + "type": "object" + } + }, + "required": ["molecule", "driver", "keywords"], + "definitions": {} +} diff --git a/schema/dev/molecule.json b/schema/dev/molecule.json new file mode 100644 index 0000000..398ea1d --- /dev/null +++ b/schema/dev/molecule.json @@ -0,0 +1,20 @@ +{ + "properties": { + "symbols": { + "description": "The atom symbol for each atom in the molecule.", + "type": "array", + "items": { + "type": "string" + } + }, + "geometry": { + "description": "The 3N XYZ coordinates of the atoms involved.", + "type": "array", + "items": { + "type": "number" + } + } + }, + "required": ["symbols", "geometry"], + "description": "The physical cartesian representation of the molecular system" +} From 72e4e80f3fd9930df15ded13b47cebcdcc8467f2 Mon Sep 17 00:00:00 2001 From: Daniel Smith Date: Wed, 21 Mar 2018 16:20:08 -0400 Subject: [PATCH 02/18] Toying around with testing systems --- schema/build_dev_schema.py | 35 ++++++++++--- schema/dev/base.json | 11 ++-- schema/dev/molecule.json | 5 ++ tests/simple/helium_dimer_hf.json | 55 ++++++++++++++++++++ tests/test_helpers.py | 83 +++++++++++++++++++++++++++++++ tests/test_schema.py | 21 ++++++++ 6 files changed, 199 insertions(+), 11 deletions(-) create mode 100644 tests/simple/helium_dimer_hf.json create mode 100644 tests/test_helpers.py create mode 100644 tests/test_schema.py diff --git a/schema/build_dev_schema.py b/schema/build_dev_schema.py index d79d58d..a130bf6 100644 --- a/schema/build_dev_schema.py +++ b/schema/build_dev_schema.py @@ -2,16 +2,39 @@ A simple program to construct the input and ouput Quantum Chemistry Schema's from the development branch """ +import os import json +import glob +def read_json(filename): + with open(filename, "r") as infile: + data = json.load(infile) + return data + +# Load in the base JSON with open("dev/base.json") as infile: schema = json.load(infile) -for part in ["molecule"]: - with open("dev/" + part + ".json") as infile: - schema["definitions"][part] = json.load(infile) +# Add the definitions together +for def_file in glob.glob("dev/definitions/*"): + def_json = read_json(def_file) + name = os.path.basename(def_file).replace(".json", "") + + schema["definitions"][name] = def_json + +# Load in larger pieces like molecule and variables spec +for base_spec in ["molecule"]: + def_json = read_json("dev/" + base_spec + ".json") + schema["properties"][base_spec] = def_json + +# Write out the input and output specs +input_required = ["molecule", "driver", "keywords"] +output_required = input_required + ["provenance", "properties", "error", "success", "raw_output"] -with open("qc_schema.schema", "w") as outfile: - json.dump(schema, outfile) +for prefix, required in zip(["input", "output"], [input_required, output_required]): + schema["required"] = required + with open(prefix + "_qc_schema.schema", "w") as outfile: + json.dump(schema, outfile) -print(json.dumps(schema, indent=2)) +# Print the output spec for prosperity +# print(json.dumps(schema, indent=2)) diff --git a/schema/dev/base.json b/schema/dev/base.json index 1c21aad..0ddaa96 100644 --- a/schema/dev/base.json +++ b/schema/dev/base.json @@ -4,17 +4,18 @@ "type": "object", "version": "0.1.dev", "properties": { - "molecule": { - "type": "object", - "$ref": "#/definitions/molecule" - }, + "molecule": null, "driver": { "enum": ["energy", "gradient", "hessian", "property"] }, "keywords": { "type": "object" + }, + "provenance": { + "type": "object", + "$ref": "#/definitions/provenance" } }, - "required": ["molecule", "driver", "keywords"], + "required": null, "definitions": {} } diff --git a/schema/dev/molecule.json b/schema/dev/molecule.json index 398ea1d..936a46c 100644 --- a/schema/dev/molecule.json +++ b/schema/dev/molecule.json @@ -13,7 +13,12 @@ "items": { "type": "number" } + }, + "provenance": { + "type": "object", + "$ref": "#/definitions/provenance" } + }, "required": ["symbols", "geometry"], "description": "The physical cartesian representation of the molecular system" diff --git a/tests/simple/helium_dimer_hf.json b/tests/simple/helium_dimer_hf.json new file mode 100644 index 0000000..f48b98a --- /dev/null +++ b/tests/simple/helium_dimer_hf.json @@ -0,0 +1,55 @@ +{ + "provenance": { + "creator": "My QM Program", + "version": "1.1rc1", + "database": "https://pqr.pitt.edu/mol/HEFNNWSXXWATRW-JTQLQIEISA-N" + }, + "molecule": { + "geometry": [ + 0, + 0, + 0, + 0, + 0, + 1 + ], + "symbols": [ + "He", + "He" + ] + }, + "driver": "energy", + "keywords": { + "method": "SCF", + "basis": "sto-3g" + }, + "properties": { + "SCF N ITERS": 2.0, + "dipole_moment": { + "x": 0.0, + "y": 0.0, + "z": 0.0, + "units": "debye" + }, + "SCF TOTAL ENERGY": { + "val": -5.433191881443323, + "units": "hartree" + }, + "SCF TWO-ELECTRON ENERGY": { + "val": 4.124089347186247, + "units": "hartree" + }, + "NUCLEAR REPULSION ENERGY": { + "val": 2.11670883436, + "units": "hartree" + }, + "ONE-ELECTRON ENERGY": { + "val": -11.67399006298957, + "units": "hartree" + } + }, + "error": "", + "success": true, + "raw_output": "Output storing was not requested." +} + diff --git a/tests/test_helpers.py b/tests/test_helpers.py new file mode 100644 index 0000000..92922a7 --- /dev/null +++ b/tests/test_helpers.py @@ -0,0 +1,83 @@ +""" +Contains helper scripts to assist in testing the schema +""" + +import pytest +import os +import glob +import copy +import json +import subprocess + + +def _read_json_file(*filename): + filename = os.path.join(*filename) + with open(filename, "r") as infile: + data = json.load(infile) + return data + +# Find a few required relative paths +_test_path = os.path.dirname(os.path.abspath(__file__)) +_base_path = os.path.dirname(_test_path) +_schema_path = os.path.join(_base_path, "schema") + +# Generate the schema quickly (super hacky change later) +os.chdir(_schema_path) +subprocess.call(["python", "build_dev_schema.py"]) +os.chdir(_test_path) + +# Dictionary of known schema versions +_input_schemas = {} +_output_schemas = {} +_schema_versions = ["dev"] + +# Pull in dev schema +_input_schemas["dev"] = _read_json_file(_schema_path, "input_qc_schema.schema") +_output_schemas["dev"] = _read_json_file(_schema_path, "output_qc_schema.schema") + +# Pull in previous versions +for version_path in glob.glob(os.path.join(_schema_path, "v*")): + version = os.path.basename(version_path) + + _input_schemas[version] = _read_json_file(version_path, "input_qc_schema.schema") + _output_schemas[version] = _read_json_file(version_path, "output_qc_schema.schema") + _schema_versions.append(version) + +def list_versions(): + """ + Lists all schema versions detected by the tests. + """ + return copy.deepcopy(_schema_versions) + +def test_folder(): + """ + Returns the testing folder. + """ + return _test_path + +def list_tests(folder): + """ + Lists all tests in a given folder. + """ + files = glob.glob(os.path.join(_test_path, folder, "*")) + files = [x.replace(_test_path, "") for x in files] + + return files + +def get_test(name): + return _read_json_file(_test_path + name) + + +def find_schema(version, schema_type): + """ + Returns the appropriate schema version. + """ + + if schema_type == "input": + return _input_schemas[version] + elif schema_type == "output": + return _output_schemas[version] + else: + raise KeyError("schema_type can only be either 'input' or 'output', found %s" % schema_type) + + diff --git a/tests/test_schema.py b/tests/test_schema.py new file mode 100644 index 0000000..f16d552 --- /dev/null +++ b/tests/test_schema.py @@ -0,0 +1,21 @@ +""" +Tests the JSON schema +""" +import jsonschema +import pytest +import os + +import test_helpers + +# Loop over all tests that should pass the tests +@pytest.mark.parametrize("version", test_helpers.list_versions()) +@pytest.mark.parametrize("testfile", test_helpers.list_tests("simple")) +def test_schema_validation(version, testfile): + + schema = test_helpers.find_schema(version, "output") + example = test_helpers.get_test(testfile) + jsonschema.validate(example, schema) + + + + From 6843ee5e2ef9681c92f32677fff51ae670528d94 Mon Sep 17 00:00:00 2001 From: Daniel Smith Date: Wed, 21 Mar 2018 16:44:28 -0400 Subject: [PATCH 03/18] Adds various built in definitions --- .gitignore | 7 +++++++ schema/dev/definitions/error.json | 19 +++++++++++++++++++ schema/dev/definitions/provenance.json | 19 +++++++++++++++++++ 3 files changed, 45 insertions(+) create mode 100644 schema/dev/definitions/error.json create mode 100644 schema/dev/definitions/provenance.json diff --git a/.gitignore b/.gitignore index 7bbc71c..1a5e080 100644 --- a/.gitignore +++ b/.gitignore @@ -99,3 +99,10 @@ ENV/ # mypy .mypy_cache/ + +# Vim leftover +*.swp + +# Dev schema +schema/input_qc_schema.schema +schema/output_qc_schema.schema diff --git a/schema/dev/definitions/error.json b/schema/dev/definitions/error.json new file mode 100644 index 0000000..381f419 --- /dev/null +++ b/schema/dev/definitions/error.json @@ -0,0 +1,19 @@ +{ + "properties": { + "creator": { + "description": "The name of the person or program who created this object.", + "type": "string" + }, + "version": { + "description": "The version of the program which created this object, blank otherwise.", + "type": "string" + }, + "routine": { + "description": "The routine of the program which created this object, blank otherwise.", + "type": "string" + } + }, + "required": ["creator"], + "description": "The type of error message raised.", + "additionalProperties": true +} diff --git a/schema/dev/definitions/provenance.json b/schema/dev/definitions/provenance.json new file mode 100644 index 0000000..dd241e7 --- /dev/null +++ b/schema/dev/definitions/provenance.json @@ -0,0 +1,19 @@ +{ + "properties": { + "creator": { + "description": "The name of the person or program who created this object.", + "type": "string" + }, + "version": { + "description": "The version of the program which created this object, blank otherwise.", + "type": "string" + }, + "routine": { + "description": "The routine of the program which created this object, blank otherwise.", + "type": "string" + } + }, + "required": ["creator"], + "description": "A short provenance of the object.", + "additionalProperties": true +} From ba958ef3368e8afa04d6cf0207e478c2a5728d2a Mon Sep 17 00:00:00 2001 From: Daniel Smith Date: Wed, 21 Mar 2018 16:47:55 -0400 Subject: [PATCH 04/18] Adds a simple travis YAML --- .travis.yml | 26 ++++++++++++++++++++++++++ Topology/README.md | 12 ++++++------ 2 files changed, 32 insertions(+), 6 deletions(-) create mode 100644 .travis.yml diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..91cccda --- /dev/null +++ b/.travis.yml @@ -0,0 +1,26 @@ +# After changing this file, check it on: +# http://lint.travis-ci.org/ +language: python + +# Run jobs on container-based infrastructure, can be overridden per job +sudo: false + +matrix: + include: + - python: 2.7 + - python: 3.5 + +before_install: + - uname -a + - free -m + - df -h + - ulimit -a + - python -V + - pip install --upgrade pip setuptools + - pip install pytest jsonschema + +script: + - py.test -v + +notifications: + email: false diff --git a/Topology/README.md b/Topology/README.md index 1f50dec..74ebc7d 100644 --- a/Topology/README.md +++ b/Topology/README.md @@ -10,7 +10,7 @@ should likely be handled by a higher level driver and not make the spec more dif The following molecule specification is used. The required fields are: - - `symbols` (list) - A list of strings + - `symbols` (list) - A list of strings - `geometry` (list) - A 3N XYZ coordinate list of list in bohr, will likely change to encompass decided unit specifications The following are optional fields and default values (option, more a list of possibilities QM programs would want): @@ -23,15 +23,15 @@ The following are optional fields and default values (option, more a list of pos - `comment` (str) - Any additional comment one would attach to the molecule. - `fragments` (list of tuples, `[]`) - A list of indices (0-indexed) for molecular fragments within the topology. - `fragment_charges` (list of floats, `[]`) - A list of charges associated with the fragments tuple. - - `fragment_multiplicities` (list of ints, `[]`) - A list of multiplicites associated with each fragment. + - `fragment_multiplicities` (list of ints, `[]`) - A list of multiplicites associated with each fragment. + - `fix_com` (bool) - whether to adjust to the molecule to the COM or not + - `fix_orientation` (bool) - whether to rotate the molecule to a standard orientation or not - `provenance` (dict, `{}`) - The provencance of the molecule. - `doi` - A doi reference for the molecule. Other possible quantities: - Bonds - Holding data for MM computations - - Basis Sets per atom - - `fix_com` (bool) - whether to adjust to the molecule to the COM or not - - `fix_orientation` (bool) - whether to rotate the molecule to a standard orientation or not + - Basis Sets per atom - label (list of str) - Per-atom labels which may be seperate from fragments - Extend the `real` quantitity to cover real, ghost, absent, qm/mm region, etc. - - EFP quantities `fragment_types`, `coordinate_hints`. This is an example and likely not part of the spec. How would we handle this? + - EFP quantities `fragment_types`, `coordinate_hints`. This is an example and likely not part of the spec. How would we handle this? From d4fc8f5d9fe15c528b11218ff0b7cab47b4723f8 Mon Sep 17 00:00:00 2001 From: Daniel Smith Date: Wed, 21 Mar 2018 17:48:04 -0400 Subject: [PATCH 05/18] Reformats error definition --- schema/dev/definitions/error.json | 18 ++++---- schema/dev/molecule.json | 68 ++++++++++++++++++++++++++++++- tests/test_helpers.py | 4 +- 3 files changed, 77 insertions(+), 13 deletions(-) diff --git a/schema/dev/definitions/error.json b/schema/dev/definitions/error.json index 381f419..d2659ac 100644 --- a/schema/dev/definitions/error.json +++ b/schema/dev/definitions/error.json @@ -1,19 +1,15 @@ { "properties": { - "creator": { - "description": "The name of the person or program who created this object.", - "type": "string" - }, - "version": { - "description": "The version of the program which created this object, blank otherwise.", - "type": "string" + "error_type": { + "description": "The type of error raised.", + "enum": ["convergence_error", "file_error", "memory_error"] }, - "routine": { - "description": "The routine of the program which created this object, blank otherwise.", + "error_message": { + "description": "A description of the raised error.", "type": "string" } }, - "required": ["creator"], + "required": ["error_type", "error_message"], "description": "The type of error message raised.", - "additionalProperties": true + "additionalProperties": false } diff --git a/schema/dev/molecule.json b/schema/dev/molecule.json index 936a46c..7fb28d9 100644 --- a/schema/dev/molecule.json +++ b/schema/dev/molecule.json @@ -1,4 +1,5 @@ { + "$schema": "http://json-schema.org/draft-04/schema#", "properties": { "symbols": { "description": "The atom symbol for each atom in the molecule.", @@ -14,11 +15,76 @@ "type": "number" } }, + "masses": { + "description": "The mass of the molecule, canonical weights assumed if not given.", + "type": "array", + "items": { + "type": "number" + } + }, + "name": { + "description": "The name of the molecule.", + "type": "string" + }, + "comment": { + "description": "Any additional comment one would attach to the molecule.", + "type": "string" + }, + "charge": { + "description": "The overall charge of the molecule.", + "type": "number" + }, + "multiplicity": { + "description": "The overall mulitiplicity of the molecule.", + "type": "number", + "multipleOf": 1.0 + }, + "real": { + "description": "A list describing if the atoms are real or ghost.", + "type": "array", + "items": { + "type": "boolean" + } + }, + "fragments": { + "description": + "A list of indices (0-indexed) for molecular fragments within the topology.", + "type": "array", + "items": { + "type": "array", + "items": { + "type": "number", + "multipleOf": 1.0 + } + } + }, + "fragment_charges": { + "description": "A list of charges associated with the fragments tuple.", + "type": "array", + "items": { + "type": "number" + } + }, + "fragment_multiplicities": { + "description": "A list of multiplicites associated with each fragment.", + "type": "array", + "items": { + "type": "number", + "multipleOf": 1.0 + } + }, + "fix_com": { + "description": "Whether to adjust to the molecule to the COM or not.", + "type": "boolean" + }, + "fix_orientation": { + "description": "Whether to rotate the molecule to a standard orientation or not.", + "type": "boolean" + }, "provenance": { "type": "object", "$ref": "#/definitions/provenance" } - }, "required": ["symbols", "geometry"], "description": "The physical cartesian representation of the molecular system" diff --git a/tests/test_helpers.py b/tests/test_helpers.py index 92922a7..1d3614f 100644 --- a/tests/test_helpers.py +++ b/tests/test_helpers.py @@ -23,7 +23,9 @@ def _read_json_file(*filename): # Generate the schema quickly (super hacky change later) os.chdir(_schema_path) -subprocess.call(["python", "build_dev_schema.py"]) +ret = subprocess.call(["python", "build_dev_schema.py"]) +if ret > 0: + raise ValueError("Development schema failed to build") os.chdir(_test_path) # Dictionary of known schema versions From a748f51e7a818711e0d45ed67734baa23a876cb3 Mon Sep 17 00:00:00 2001 From: Daniel Smith Date: Tue, 27 Mar 2018 10:46:32 -0400 Subject: [PATCH 06/18] Starts the move to a small py repo for organizational purposes --- {schema => qc_schema}/build_dev_schema.py | 0 {schema => qc_schema}/dev/base.json | 0 {schema => qc_schema}/dev/definitions/error.json | 0 {schema => qc_schema}/dev/definitions/provenance.json | 0 {schema => qc_schema}/dev/molecule.json | 0 5 files changed, 0 insertions(+), 0 deletions(-) rename {schema => qc_schema}/build_dev_schema.py (100%) rename {schema => qc_schema}/dev/base.json (100%) rename {schema => qc_schema}/dev/definitions/error.json (100%) rename {schema => qc_schema}/dev/definitions/provenance.json (100%) rename {schema => qc_schema}/dev/molecule.json (100%) diff --git a/schema/build_dev_schema.py b/qc_schema/build_dev_schema.py similarity index 100% rename from schema/build_dev_schema.py rename to qc_schema/build_dev_schema.py diff --git a/schema/dev/base.json b/qc_schema/dev/base.json similarity index 100% rename from schema/dev/base.json rename to qc_schema/dev/base.json diff --git a/schema/dev/definitions/error.json b/qc_schema/dev/definitions/error.json similarity index 100% rename from schema/dev/definitions/error.json rename to qc_schema/dev/definitions/error.json diff --git a/schema/dev/definitions/provenance.json b/qc_schema/dev/definitions/provenance.json similarity index 100% rename from schema/dev/definitions/provenance.json rename to qc_schema/dev/definitions/provenance.json diff --git a/schema/dev/molecule.json b/qc_schema/dev/molecule.json similarity index 100% rename from schema/dev/molecule.json rename to qc_schema/dev/molecule.json From 9d5faadc94eb4de8141297692ac9fbfe15936c6d Mon Sep 17 00:00:00 2001 From: Daniel Smith Date: Tue, 27 Mar 2018 10:46:51 -0400 Subject: [PATCH 07/18] Adds MP and SCF properties --- qc_schema/dev/properties/mp_properties.py | 38 ++++++++++++ qc_schema/dev/properties/scf_properties.py | 70 ++++++++++++++++++++++ 2 files changed, 108 insertions(+) create mode 100644 qc_schema/dev/properties/mp_properties.py create mode 100644 qc_schema/dev/properties/scf_properties.py diff --git a/qc_schema/dev/properties/mp_properties.py b/qc_schema/dev/properties/mp_properties.py new file mode 100644 index 0000000..75ef35b --- /dev/null +++ b/qc_schema/dev/properties/mp_properties.py @@ -0,0 +1,38 @@ +""" +The complete list of Moller-Plesset properties. +""" + +mp_properties = {} + +mp_properties["mp2_same_spin_correlation_energy"] = { + "type": + "number", + "description": + "The unscaled portion of the MP2 correlation energy from same-spin or triplet doubles correlations." +} + +mp_properties["mp2_opposite_spin_correlation_energy"] = { + "type": + "number", + "description": + "The unscaled portion of the MP2 correlation energy from opposite-spin or singlet doubles correlations." +} + +mp_properties["mp2_single_energy"] = { + "type": "number", + "description": "The singles portion of the MP2 correlation energy. Zero except in ROHF." +} + +mp_properties["mp2_double_energy"] = { + "type": + "number", + "description": + "The doubles portion of the MP2 correlation energy including same-spin and opposite-spin correlations." +} + +mp_properties['mp2_total_correlation_energy'] = { + "type": + "number", + "description": + "The doubles portion of the MP2 correlation energy including same-spin and opposite-spin correlations." +} diff --git a/qc_schema/dev/properties/scf_properties.py b/qc_schema/dev/properties/scf_properties.py new file mode 100644 index 0000000..6434c19 --- /dev/null +++ b/qc_schema/dev/properties/scf_properties.py @@ -0,0 +1,70 @@ +""" +The complete list of SCF level properties. +""" + +scf_properties = {} + +scf_properties["scf_one_electron_energy"] = { + "description": "The one-electron energy contribution [H] to the total SCF energy.", + "type": "number" +} + +scf_properties["scf_two_electron_energy"] = { + "type": "number", + "description": "The two-electron energy contribution [H] to the total SCF energy." +} + +scf_properties["nuclear_repulsion_energy"] = { + "type": + "number", + "description": + """ +The nuclear repulsion energy contribution [H] to the total SCF energy. +.. math:: E_{NN} = \sum_{i, j Date: Tue, 27 Mar 2018 12:25:57 -0400 Subject: [PATCH 08/18] Continue refactoring to a git module Continue to move to repo snapshot Finshes transition to project Finishes moving to a module finshed modulizing --- qc_schema/__init__.py | 7 +++ qc_schema/build_dev_schema.py | 40 ------------- qc_schema/data/__init__.py | 3 + qc_schema/dev/__init__.py | 1 + qc_schema/dev/base.json | 21 ------- qc_schema/dev/definitions.py | 41 ++++++++++++++ qc_schema/dev/definitions/error.json | 15 ----- qc_schema/dev/definitions/provenance.json | 19 ------- qc_schema/dev/dev_schema.py | 59 ++++++++++++++++++++ qc_schema/dev/{molecule.json => molecule.py} | 20 +++++-- qc_schema/dev/properties/__init__.py | 1 + qc_schema/dev/properties/properties_base.py | 17 ++++++ qc_schema/dev/properties/scf_properties.py | 2 +- qc_schema/validate.py | 15 +++++ qc_schema/versions.py | 45 +++++++++++++++ tests/simple/helium_dimer_hf.json | 29 ++-------- tests/test_helpers.py | 46 +-------------- tests/test_schema.py | 6 +- 18 files changed, 215 insertions(+), 172 deletions(-) create mode 100644 qc_schema/__init__.py delete mode 100644 qc_schema/build_dev_schema.py create mode 100644 qc_schema/data/__init__.py create mode 100644 qc_schema/dev/__init__.py delete mode 100644 qc_schema/dev/base.json create mode 100644 qc_schema/dev/definitions.py delete mode 100644 qc_schema/dev/definitions/error.json delete mode 100644 qc_schema/dev/definitions/provenance.json create mode 100644 qc_schema/dev/dev_schema.py rename qc_schema/dev/{molecule.json => molecule.py} (89%) create mode 100644 qc_schema/dev/properties/__init__.py create mode 100644 qc_schema/dev/properties/properties_base.py create mode 100644 qc_schema/validate.py create mode 100644 qc_schema/versions.py diff --git a/qc_schema/__init__.py b/qc_schema/__init__.py new file mode 100644 index 0000000..ca5ffda --- /dev/null +++ b/qc_schema/__init__.py @@ -0,0 +1,7 @@ +""" +The main init functions of the QC Schema project +""" + +from . import dev +from .validate import validate +from .versions import list_versions, get_schema diff --git a/qc_schema/build_dev_schema.py b/qc_schema/build_dev_schema.py deleted file mode 100644 index a130bf6..0000000 --- a/qc_schema/build_dev_schema.py +++ /dev/null @@ -1,40 +0,0 @@ -""" -A simple program to construct the input and ouput Quantum Chemistry Schema's -from the development branch -""" -import os -import json -import glob - -def read_json(filename): - with open(filename, "r") as infile: - data = json.load(infile) - return data - -# Load in the base JSON -with open("dev/base.json") as infile: - schema = json.load(infile) - -# Add the definitions together -for def_file in glob.glob("dev/definitions/*"): - def_json = read_json(def_file) - name = os.path.basename(def_file).replace(".json", "") - - schema["definitions"][name] = def_json - -# Load in larger pieces like molecule and variables spec -for base_spec in ["molecule"]: - def_json = read_json("dev/" + base_spec + ".json") - schema["properties"][base_spec] = def_json - -# Write out the input and output specs -input_required = ["molecule", "driver", "keywords"] -output_required = input_required + ["provenance", "properties", "error", "success", "raw_output"] - -for prefix, required in zip(["input", "output"], [input_required, output_required]): - schema["required"] = required - with open(prefix + "_qc_schema.schema", "w") as outfile: - json.dump(schema, outfile) - -# Print the output spec for prosperity -# print(json.dumps(schema, indent=2)) diff --git a/qc_schema/data/__init__.py b/qc_schema/data/__init__.py new file mode 100644 index 0000000..f2e314d --- /dev/null +++ b/qc_schema/data/__init__.py @@ -0,0 +1,3 @@ +""" +Full versions of the schema will be inserted here. +""" diff --git a/qc_schema/dev/__init__.py b/qc_schema/dev/__init__.py new file mode 100644 index 0000000..9e3369a --- /dev/null +++ b/qc_schema/dev/__init__.py @@ -0,0 +1 @@ +from .dev_schema import input_dev_schema, output_dev_schema diff --git a/qc_schema/dev/base.json b/qc_schema/dev/base.json deleted file mode 100644 index 0ddaa96..0000000 --- a/qc_schema/dev/base.json +++ /dev/null @@ -1,21 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-06/schema#", - "description": "The MolSSI Quantum Chemistry Schema", - "type": "object", - "version": "0.1.dev", - "properties": { - "molecule": null, - "driver": { - "enum": ["energy", "gradient", "hessian", "property"] - }, - "keywords": { - "type": "object" - }, - "provenance": { - "type": "object", - "$ref": "#/definitions/provenance" - } - }, - "required": null, - "definitions": {} -} diff --git a/qc_schema/dev/definitions.py b/qc_schema/dev/definitions.py new file mode 100644 index 0000000..b9879a2 --- /dev/null +++ b/qc_schema/dev/definitions.py @@ -0,0 +1,41 @@ +""" +A list of definitions involved in the JSON schema. +""" + +definitions = {} + +definitions["error"] = { + "properties": { + "error_type": { + "description": "The type of error raised.", + "enum": ["convergence_error", "file_error", "memory_error"] + }, + "error_message": { + "description": "A description of the raised error.", + "type": "string" + } + }, + "required": ["error_type", "error_message"], + "description": "The type of error message raised.", + "additionalProperties": False +} + +definitions["provenance"] = { + "properties": { + "creator": { + "description": "The name of the person or program who created this object.", + "type": "string" + }, + "version": { + "description": "The version of the program which created this object, blank otherwise.", + "type": "string" + }, + "routine": { + "description": "The routine of the program which created this object, blank otherwise.", + "type": "string" + } + }, + "required": ["creator"], + "description": "A short provenance of the object.", + "additionalProperties": True +} diff --git a/qc_schema/dev/definitions/error.json b/qc_schema/dev/definitions/error.json deleted file mode 100644 index d2659ac..0000000 --- a/qc_schema/dev/definitions/error.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "properties": { - "error_type": { - "description": "The type of error raised.", - "enum": ["convergence_error", "file_error", "memory_error"] - }, - "error_message": { - "description": "A description of the raised error.", - "type": "string" - } - }, - "required": ["error_type", "error_message"], - "description": "The type of error message raised.", - "additionalProperties": false -} diff --git a/qc_schema/dev/definitions/provenance.json b/qc_schema/dev/definitions/provenance.json deleted file mode 100644 index dd241e7..0000000 --- a/qc_schema/dev/definitions/provenance.json +++ /dev/null @@ -1,19 +0,0 @@ -{ - "properties": { - "creator": { - "description": "The name of the person or program who created this object.", - "type": "string" - }, - "version": { - "description": "The version of the program which created this object, blank otherwise.", - "type": "string" - }, - "routine": { - "description": "The routine of the program which created this object, blank otherwise.", - "type": "string" - } - }, - "required": ["creator"], - "description": "A short provenance of the object.", - "additionalProperties": true -} diff --git a/qc_schema/dev/dev_schema.py b/qc_schema/dev/dev_schema.py new file mode 100644 index 0000000..d951a48 --- /dev/null +++ b/qc_schema/dev/dev_schema.py @@ -0,0 +1,59 @@ +""" +Integrates all components of the QC Schema into a single one. +""" + +import copy + +from . import molecule +from . import definitions +from . import properties + +# The base schema definition +base_schema = { + "$schema": "http://json-schema.org/draft-06/schema#", + "description": "The MolSSI Quantum Chemistry Schema", + "type": "object", + "version": "0.1.dev", + "properties": { + "molecule": molecule.molecule, + "driver": { + "definition": "The type of computation requested", + "enum": ["energy", "gradient", "hessian", "property"] + }, + "keywords": { + "type": "object" + }, + "provenance": { + "type": "object", + "$ref": "#/definitions/provenance" + } + }, + "required": ["molecule", "driver", "keywords"], + "definitions": definitions.definitions +} + +# Additional properties to contain in the output +output_properties = { + "properties": properties.properties, + "success": { + "type": "boolean" + }, + "error": { + "type": "object", + "$ref": "#/definitions/error" + }, +} + +# Snapshot the input dev schema +input_dev_schema = copy.deepcopy(base_schema) + +# Add additional output pieces +base_schema["properties"].update(output_properties) +base_schema["required"].extend(["provenance", "properties", "success"]) + +# Snapshot the input dev schema +output_dev_schema = copy.deepcopy(base_schema) + +#import json +#print(json.dumps(input_dev_schema, indent=2)) +#print(json.dumps(output_dev_schema, indent=2)) diff --git a/qc_schema/dev/molecule.json b/qc_schema/dev/molecule.py similarity index 89% rename from qc_schema/dev/molecule.json rename to qc_schema/dev/molecule.py index 7fb28d9..d8c9473 100644 --- a/qc_schema/dev/molecule.json +++ b/qc_schema/dev/molecule.py @@ -1,4 +1,7 @@ -{ +""" +The json-schema for the Molecule definition +""" +molecule = { "$schema": "http://json-schema.org/draft-04/schema#", "properties": { "symbols": { @@ -32,12 +35,14 @@ }, "charge": { "description": "The overall charge of the molecule.", - "type": "number" + "type": "number", + "default": 0.0 }, "multiplicity": { "description": "The overall mulitiplicity of the molecule.", "type": "number", - "multipleOf": 1.0 + "multipleOf": 1.0, + "default": 1 }, "real": { "description": "A list describing if the atoms are real or ghost.", @@ -49,7 +54,8 @@ "fragments": { "description": "A list of indices (0-indexed) for molecular fragments within the topology.", - "type": "array", + "type": + "array", "items": { "type": "array", "items": { @@ -75,11 +81,13 @@ }, "fix_com": { "description": "Whether to adjust to the molecule to the COM or not.", - "type": "boolean" + "type": "boolean", + "default": False }, "fix_orientation": { "description": "Whether to rotate the molecule to a standard orientation or not.", - "type": "boolean" + "type": "boolean", + "default": False }, "provenance": { "type": "object", diff --git a/qc_schema/dev/properties/__init__.py b/qc_schema/dev/properties/__init__.py new file mode 100644 index 0000000..a55f7c3 --- /dev/null +++ b/qc_schema/dev/properties/__init__.py @@ -0,0 +1 @@ +from .properties_base import properties diff --git a/qc_schema/dev/properties/properties_base.py b/qc_schema/dev/properties/properties_base.py new file mode 100644 index 0000000..578a635 --- /dev/null +++ b/qc_schema/dev/properties/properties_base.py @@ -0,0 +1,17 @@ +""" +The base file for QC Schema properties. +""" + +from .scf_properties import scf_properties +from .mp_properties import mp_properties + +properties = { + "type": "object", + "properties": {}, + "description": "The resulting properties of a computation", + "additionalProperties": False +} + +# Update new keys +properties["properties"].update(scf_properties) +properties["properties"].update(mp_properties) diff --git a/qc_schema/dev/properties/scf_properties.py b/qc_schema/dev/properties/scf_properties.py index 6434c19..1a10ff3 100644 --- a/qc_schema/dev/properties/scf_properties.py +++ b/qc_schema/dev/properties/scf_properties.py @@ -44,7 +44,7 @@ """ } -scf_properties["scf_dipole"] = { +scf_properties["scf_dipole_moment"] = { "type": "array", "description": "The X, Y, and Z dipole components.", "type": "array", diff --git a/qc_schema/validate.py b/qc_schema/validate.py new file mode 100644 index 0000000..8466699 --- /dev/null +++ b/qc_schema/validate.py @@ -0,0 +1,15 @@ +""" +Schema validation tools +""" + +import jsonschema + +from . import versions + +def validate(data, schema_type, version="dev"): + """ + Validates a given input for a schema input and output type. + """ + schema = versions.get_schema(schema_type, version) + + jsonschema.validate(data, schema) diff --git a/qc_schema/versions.py b/qc_schema/versions.py new file mode 100644 index 0000000..a0984ac --- /dev/null +++ b/qc_schema/versions.py @@ -0,0 +1,45 @@ +""" +A simple program to construct the input and ouput Quantum Chemistry Schema's +from the development branch +""" + +from . import dev +from . import data + +_schema_input_dict = {} +_schema_output_dict = {} + +# Hooks for non-dev versions +# _schema_input_dict.update(data.input_schemas) +# _schema_output_dict.update(data.output_schemas) + +# Add in dev schema + +_schema_input_dict["dev"] = dev.input_dev_schema +_schema_output_dict["dev"] = dev.output_dev_schema + +# Double check all of the keys are correctly entered +assert _schema_input_dict.keys() == _schema_output_dict.keys() + +def list_versions(): + """ + Lists all current JSON schema versions. + """ + return list(_schema_input_dict) + +def get_schema(schema_type, version="dev"): + """ + Returns the requested schema (input or output) for a given version number. + """ + + if version not in _schema_input_dict: + raise KeyError("Schema version %s not found." % version) + + + if schema_type.lower() == "input": + return _schema_input_dict[version] + + elif schema_type.lower() == "output": + return _schema_output_dict[version] + else: + raise KeyError("Schema type should either be 'input' or 'output', given: %s." % schema_type) diff --git a/tests/simple/helium_dimer_hf.json b/tests/simple/helium_dimer_hf.json index f48b98a..c0426bc 100644 --- a/tests/simple/helium_dimer_hf.json +++ b/tests/simple/helium_dimer_hf.json @@ -24,29 +24,12 @@ "basis": "sto-3g" }, "properties": { - "SCF N ITERS": 2.0, - "dipole_moment": { - "x": 0.0, - "y": 0.0, - "z": 0.0, - "units": "debye" - }, - "SCF TOTAL ENERGY": { - "val": -5.433191881443323, - "units": "hartree" - }, - "SCF TWO-ELECTRON ENERGY": { - "val": 4.124089347186247, - "units": "hartree" - }, - "NUCLEAR REPULSION ENERGY": { - "val": 2.11670883436, - "units": "hartree" - }, - "ONE-ELECTRON ENERGY": { - "val": -11.67399006298957, - "units": "hartree" - } + "scf_iterations": 2, + "scf_dipole_moment": [0.0, 0.0, 0.0], + "scf_total_energy": -5.433191881443323, + "scf_one_electron_energy": -11.67399006298957, + "scf_two_electron_energy": 4.124089347186247, + "nuclear_repulsion_energy": 2.11670883436 }, "error": "", "success": true, diff --git a/tests/test_helpers.py b/tests/test_helpers.py index 1d3614f..78fbf07 100644 --- a/tests/test_helpers.py +++ b/tests/test_helpers.py @@ -9,6 +9,8 @@ import json import subprocess +import qc_schema + def _read_json_file(*filename): filename = os.path.join(*filename) @@ -19,37 +21,7 @@ def _read_json_file(*filename): # Find a few required relative paths _test_path = os.path.dirname(os.path.abspath(__file__)) _base_path = os.path.dirname(_test_path) -_schema_path = os.path.join(_base_path, "schema") - -# Generate the schema quickly (super hacky change later) -os.chdir(_schema_path) -ret = subprocess.call(["python", "build_dev_schema.py"]) -if ret > 0: - raise ValueError("Development schema failed to build") -os.chdir(_test_path) - -# Dictionary of known schema versions -_input_schemas = {} -_output_schemas = {} -_schema_versions = ["dev"] - -# Pull in dev schema -_input_schemas["dev"] = _read_json_file(_schema_path, "input_qc_schema.schema") -_output_schemas["dev"] = _read_json_file(_schema_path, "output_qc_schema.schema") -# Pull in previous versions -for version_path in glob.glob(os.path.join(_schema_path, "v*")): - version = os.path.basename(version_path) - - _input_schemas[version] = _read_json_file(version_path, "input_qc_schema.schema") - _output_schemas[version] = _read_json_file(version_path, "output_qc_schema.schema") - _schema_versions.append(version) - -def list_versions(): - """ - Lists all schema versions detected by the tests. - """ - return copy.deepcopy(_schema_versions) def test_folder(): """ @@ -69,17 +41,3 @@ def list_tests(folder): def get_test(name): return _read_json_file(_test_path + name) - -def find_schema(version, schema_type): - """ - Returns the appropriate schema version. - """ - - if schema_type == "input": - return _input_schemas[version] - elif schema_type == "output": - return _output_schemas[version] - else: - raise KeyError("schema_type can only be either 'input' or 'output', found %s" % schema_type) - - diff --git a/tests/test_schema.py b/tests/test_schema.py index f16d552..e00dabb 100644 --- a/tests/test_schema.py +++ b/tests/test_schema.py @@ -6,15 +6,15 @@ import os import test_helpers +import qc_schema # Loop over all tests that should pass the tests -@pytest.mark.parametrize("version", test_helpers.list_versions()) +@pytest.mark.parametrize("version", qc_schema.list_versions()) @pytest.mark.parametrize("testfile", test_helpers.list_tests("simple")) def test_schema_validation(version, testfile): - schema = test_helpers.find_schema(version, "output") example = test_helpers.get_test(testfile) - jsonschema.validate(example, schema) + qc_schema.validate(example, "output") From 1163ae87220c975a973e594391c05f3d8b97cfb1 Mon Sep 17 00:00:00 2001 From: Daniel Smith Date: Tue, 27 Mar 2018 16:11:11 -0400 Subject: [PATCH 09/18] Starts documentation --- docs/Makefile | 21 +++++ docs/README.md | 16 ++++ docs/make.bat | 36 +++++++ docs/source/conf.py | 160 ++++++++++++++++++++++++++++++++ docs/source/index.rst | 70 ++++++++++++++ docs/source/spec_components.rst | 68 ++++++++++++++ docs/source/tech_specs.rst | 111 ++++++++++++++++++++++ 7 files changed, 482 insertions(+) create mode 100644 docs/Makefile create mode 100644 docs/README.md create mode 100644 docs/make.bat create mode 100644 docs/source/conf.py create mode 100644 docs/source/index.rst create mode 100644 docs/source/spec_components.rst create mode 100644 docs/source/tech_specs.rst diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..edc7d7e --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,21 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line. +SPHINXOPTS = +SPHINXBUILD = sphinx-build +SPHINXPROJ = qc_schema +SOURCEDIR = source +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 0000000..2505698 --- /dev/null +++ b/docs/README.md @@ -0,0 +1,16 @@ +# Compiling QC_JSON_Schema's Documentation + +The docs for this project are built with [Sphinx](http://www.sphinx-doc.org/en/master/). +To compile the docs, first ensure that Sphinx and the ReadTheDocs theme are installed. + +```bash +pip install sphinx sphinx_rtd_theme +``` + +Once installed, you can use the `Makefile` in this directory to compile static HTML pages by +```bash +make html +``` + +The compiled docs will be in the `_build` directory and can be viewed by opening `index.html` (which may itself +be inside a directory called `html/` depending on what version of Sphinx is installed). diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 0000000..7dd12d8 --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,36 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=source +set BUILDDIR=_build +set SPHINXPROJ=qc_schema + +if "%1" == "" goto help + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 +) + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% + +:end +popd diff --git a/docs/source/conf.py b/docs/source/conf.py new file mode 100644 index 0000000..d12ec5d --- /dev/null +++ b/docs/source/conf.py @@ -0,0 +1,160 @@ +# -*- coding: utf-8 -*- +# +# Configuration file for the Sphinx documentation builder. +# +# This file does only contain a selection of the most common options. For a +# full list see the documentation: +# http://www.sphinx-doc.org/en/stable/config + +# -- Path setup -------------------------------------------------------------- + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# +# import os +# import sys +# sys.path.insert(0, os.path.abspath('.')) + + +# -- Project information ----------------------------------------------------- + +project = 'A schema for Quantum Chemistry' +copyright = "2018, The Molecular Sciences Software Institute" +author = 'The Molecular Sciences Software Institute' + +# The short X.Y version +version = '' +# The full version, including alpha/beta/rc tags +release = '' + + +# -- General configuration --------------------------------------------------- + +# If your documentation needs a minimal Sphinx version, state it here. +# +# needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + 'sphinx.ext.autodoc', + 'sphinx.ext.mathjax', +] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix(es) of source filenames. +# You can specify multiple suffix as a list of string: +# +# source_suffix = ['.rst', '.md'] +source_suffix = '.rst' + +# The master toctree document. +master_doc = 'index' + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +# +# This is also used if you do content translation via gettext catalogs. +# Usually you set "language" from the command line for these cases. +language = None + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path . +exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' + + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = 'sphinx_rtd_theme' + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +# +# html_theme_options = {} + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] + +# Custom sidebar templates, must be a dictionary that maps document names +# to template names. +# +# The default sidebars (for documents that don't match any pattern) are +# defined by theme itself. Builtin themes are using these templates by +# default: ``['localtoc.html', 'relations.html', 'sourcelink.html', +# 'searchbox.html']``. +# +# html_sidebars = {} + + +# -- Options for HTMLHelp output --------------------------------------------- + +# Output file base name for HTML help builder. +htmlhelp_basename = 'QC_JSON_Schemadoc' + + +# -- Options for LaTeX output ------------------------------------------------ + +latex_elements = { + # The paper size ('letterpaper' or 'a4paper'). + # + # 'papersize': 'letterpaper', + + # The font size ('10pt', '11pt' or '12pt'). + # + # 'pointsize': '10pt', + + # Additional stuff for the LaTeX preamble. + # + # 'preamble': '', + + # Latex figure (float) alignment + # + # 'figure_align': 'htbp', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, +# author, documentclass [howto, manual, or own class]). +latex_documents = [ + (master_doc, 'QC_JSON_Schema.tex', 'QC_JSON_Schema Documentation', + 'QC_JSON_Schema', 'manual'), +] + + +# -- Options for manual page output ------------------------------------------ + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [ + (master_doc, 'QC_JSON_Schema', 'QC_JSON_Schema Documentation', + [author], 1) +] + + +# -- Options for Texinfo output ---------------------------------------------- + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [ + (master_doc, 'QC_JSON_Schema', 'QC_JSON_Schema Documentation', + author, 'QC_JSON_Schema', 'A schema for Quantum Chemistry', + 'Miscellaneous'), +] + + +# -- Extension configuration ------------------------------------------------- diff --git a/docs/source/index.rst b/docs/source/index.rst new file mode 100644 index 0000000..7e6e44b --- /dev/null +++ b/docs/source/index.rst @@ -0,0 +1,70 @@ +.. QC_JSON_Schema documentation master file, created by + sphinx-quickstart on Thu Mar 15 13:55:56 2018. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Quantum Chemistry Schema +========================================================= +A JSON Schema for Quantum Chemistry + + +The purpose of this schema is to provide API like access to pre-existing quantum +chemistry packages to enable more complex workflows. The core of this is to +avoid parsing ASCII-based output files and place output variables, vectors, +matrices in a consistent format that can be easily parsed. + +High Level Aspirations +---------------------- +In order to help define the overall scope and direction of the specification several high level goals will be pursued: + + * Ability to connect to visualizers and GUI's + * Connect to existing Workflows tools + * Transfer data between QM programs (Orbitals, Densities, etc) + * Provide a rigorous record of computation for large scale QM databases + * Provide a framework for QM API access + +A concrete list of requirements for this schema can be found [here](Requirements.md). + +**Organizations:** + * `The Molecular Sciences Software Institute `_ + +**Visualizers:** + - `Avogadro `_ + - `Molecular Design Toolkit `_ + - `VTK `_ + +**Quantum Chemistry Engines:** + - `MPQC `_ + - `NWChem `_ + - `Psi4 `_ + +**Translators:** + - `cclib `_ + + +Existing JSON Efforts +---------------------- +proposed spec. The idea is to pull from this diverse group and coalesce into a +single specification to prevent duplication of effort. + + * `Autodesk JSON `_ + * `BAGEL JSON `_ + * `Chemical JSON `_ + * `MPQC JSON `_ + * `NWChem JSON `_ + * `Psi4 JSON `_ + * `PyQC Schema `_ + * `Molpro Database XML `_ + * `Chemical Markup Language `_ + + +Contents +-------- + +.. toctree:: + :maxdepth: 1 + :caption: Contents + + spec_components + tech_specs + diff --git a/docs/source/spec_components.rst b/docs/source/spec_components.rst new file mode 100644 index 0000000..df3fefe --- /dev/null +++ b/docs/source/spec_components.rst @@ -0,0 +1,68 @@ +Specification Components +======================== + +The JSON format is a general container, and the current work focuses primarily +aimed at developing a standard on top of the `JSON `_ (and +`BSON `_) format for chemical data. + +Purpose +------- + +The purpose here is to document the format, provide an `open +specification `_, establish +what is required or optional, and to provide a living specification as we extend +the format. This could reuse some of the previous work done in the `CML +format `_ for XML, and Chemical JSON and NWChem-JSON, and other +JSON/XML formats that have been used by codes. + +Input Components +---------------- + +Topology +~~~~~~~~ + +The closest representation to the real physical nature of the system. In +practical terms, for molecular sciences, this is the coordinates (in some form) +and the elements/Z-number at that coordinate. For both QM and MM, this is your +molecule. This may include bonding information and unit cell, lattice +parameters, etc, as well. + +This is the foundation upon which you build the model basis of your +calculation. + +Driver +~~~~~~ + +What are you looking to calculate: energy, gradient, Hessian, or property. + +Model +~~~~~ + +The overall mathematical model we are using for our calculation. Another way to +think about this is the largest superset that still obtains roughly the same +result. For example, Direct and Disk-based Hartree-Fock at different Schwarz +thresholds could be the same "method". However, density-fitted, LinK, or +Cholesky-based Hartree-Fock should be separate methods. + +In QM, this is the Hamiltonian (HF, DFT, ...) combined with the overall basis of +the calculation. An example in QM would be HF/STO-3G or B3LYP/6-311G**. Custom +basis sets can be handled with custom keywords. + +Keywords +~~~~~~~~ + +Various tunable parameters for the calculation. These vary widely, depending on +the basis and model chemistry. These represent the individual programs keywords currently. + +Output Components +~~~~~~~~~~~~~~~~~ + + * Repeat of input components + * Driver return - Return of the requested driver (energy/gradient/etc) + * Properties - Other properties/values constructed as by products of the computation + * Provenance - Code, computer, user information, actual settings used by the code (lots + of defaults when not specified, that do change over code versions, and are different between codes). + * Raw Output - If requested, the canonical domain specific ASCII output + * Skipped Input Fields - If the input allows pass through of other fields print the skipped ones + * Errors - If the computation failed the raised error should go here. + diff --git a/docs/source/tech_specs.rst b/docs/source/tech_specs.rst new file mode 100644 index 0000000..70843c6 --- /dev/null +++ b/docs/source/tech_specs.rst @@ -0,0 +1,111 @@ +Technical Specifications +======================== +This document contains various technical considerations that are both open and those which have been discussed and closed. + +Open Questions +-------------- + +How do we reference other objects? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +JSON does not directly support object references. This makes it non-trivial to, +say, maintain a list of bonds between atoms. Some solutions are: + + 1) by array index (e.g., :code:`residue.atom_indices=[3,4,5,6]`) + 2) by JSON path reference (see, e.g., https://tools.ietf.org/html/draft-pbryan-zyp-json-ref-03) + 3) JSON-LD allows some flexibility of referencing. Also gives flexibility to break one document + or one JSON object into pieces that can be referenced against. + 4) by a unique key. (e.g., :code:`residue.id='a83nd83'`, `residue.atoms=['a9n3d9', '31di3']`) + +Array index is probably the best option - although they are a little fragile, +they're no more fragile than path references, and require far less overhead +than unique keys. + +We need to look at this beyond atoms and bonds. Especially in workflows we can reuse pieces of data +from previous tasks in the workflow. Instead of repeating we can use referencing. + +See also: http://stackoverflow.com/q/4001474/1958900 + +How do we uniquely specify physical units? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +For instance, velocity might be "angstrom/fs" Alternatives: + + 1) Require units in the form {:code:`unit_name:exponent`}, e.g. :code:`atom.velocity.units={'angstrom':1, 'fs':-1}` + 2) Allow strings of the form :code:`atom.velocity.units="angstrom/fs"`, but require that units be chosen from a specific list of specifications + 3) Allow strings of the form :code:`atom.velocity.units="angstrom/fs"`, and require file parsers to parse the units according to a specified syntax + + BDJ Note: There are multiple standards specifications for units, and conversions. If done right in a schema, you can use JSON-LD to + link to the actual standards definition. Some examples of what I have done, which aligns with CML: + +.. code:: python + + "orbitalEnergy": {"units": "Hartree", "value": 0.935524} + "shieldingAnisotropy": {"units": "ppm","value": 17.5292} + + +JSON and HDF5 +~~~~~~~~~~~~~ + +The object specifications in this document are tailored to JSON, but can be +easily stored in an HDF5 file as well. HDF5 is, like JSON, hierarchical and +self-describing. These similarities make it easy to perform 1-to-1 +transformations between well-formed JSON and a corresponding HDF5 +representation. + +Unlike JSON, HDF5 is binary and requires custom libraries to read, but has far +better performance and storage characteristics for numerical data. We will +provide tools to easily interconvert files between JSON and HDF5. Applications +that support this format should always provide JSON support; ones that require +high performance should also support the HDF5 variant. + +Closed Questions +---------------- + +Store large collections of objects +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +How do we store large lists of objects (such as lists of atoms or bonds?) + + 1) As a table of values (these values do very well in HDF5 as well) + 2) As a set of arrays + 3) As a list of objects (works well in HDF5 too, would be worth brining in a HDF5 expert) + +Examples: + +.. code:: python + + // 1) Storing fields as tables: creates an mmCIF/PDB-like layout + {atoms={type:'table[atom]', + fields=['name', 'atomic_number', 'mass/Dalton', 'residue_index', 'position/angstrom', 'momentum/angstrom*amu*fs^-1'] + entries=[ + ['CA', 6, 12.0, 0, [0.214,12.124,1.12], [0,0,0]], + ['N', 7, 14.20, 0, [0.214,12.124,1.12], [0,0,0]], + ...} + + // 2) Storing fields as arrays: much more compact, but harder to read and edit + {num_atoms=1234, + atoms={names:['CA','CB','OP' ...], + atomic_numbers:[6,6,8, ...], + masses:{val:[12.0, 12.0, 16.12, ...], units:'amu'}, + residue_indices:[0,0,0,1,1, ...], + positions:{val:[[0.214,12.124,1.12], [0.214,12.124,1.12], ...], units:'angstrom'}, + momenta:{val:[[0,0,0], [1,2,3], ...], units:'angstrom*amu*fs^-1'} + } + + // 3) Storing the fieldnames for each atom: readable, but makes the file huge + {atoms=[ + {name:'CA', atnum:6, residue_index:0, + mass:{value:12.00, units:'Daltons'}, + position:{value:[0.214,12.124,1.12], units:'angstroms'}, + momentum:{value:[0.0, 0.0, 0.0], units:'angstrom*dalton*fs^-1'}, + }, + {name:'N', atnum:7, residue_index:0, + mass:{value:14.20, units:'Daltons'}, + position:{value:[0.214,12.124,1.12], units:'angstroms'}, + momentum:{value:[0.0, 0.0, 0.0], units:'angstrom*dalton*fs^-1'}, + }, + ... + }] + } + + From 0010693297ead22667238c8cf0c4c1c84b2333ac Mon Sep 17 00:00:00 2001 From: Daniel Smith Date: Tue, 27 Mar 2018 16:37:34 -0400 Subject: [PATCH 10/18] Starts the generation of schema docs --- .gitignore | 1 + docs/source/conf.py | 6 ++ docs/source/gen_schema_docs.py | 71 ++++++++++++++++++++++ docs/source/index.rst | 6 ++ qc_schema/dev/properties/scf_properties.py | 3 +- setup.py | 31 ++++++++++ 6 files changed, 117 insertions(+), 1 deletion(-) create mode 100644 docs/source/gen_schema_docs.py create mode 100644 setup.py diff --git a/.gitignore b/.gitignore index 1a5e080..117394d 100644 --- a/.gitignore +++ b/.gitignore @@ -106,3 +106,4 @@ ENV/ # Dev schema schema/input_qc_schema.schema schema/output_qc_schema.schema +docs/source/auto*rst diff --git a/docs/source/conf.py b/docs/source/conf.py index d12ec5d..5ed947a 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -19,6 +19,12 @@ # -- Project information ----------------------------------------------------- +# Super hacky auto gen +import sys +import os +sys.path.insert(1, os.path.dirname(__file__)) +import gen_schema_docs + project = 'A schema for Quantum Chemistry' copyright = "2018, The Molecular Sciences Software Institute" author = 'The Molecular Sciences Software Institute' diff --git a/docs/source/gen_schema_docs.py b/docs/source/gen_schema_docs.py new file mode 100644 index 0000000..07df8d5 --- /dev/null +++ b/docs/source/gen_schema_docs.py @@ -0,0 +1,71 @@ +""" +Very hacky way to write out the schema (for demo purposes only) +""" +import qc_schema + +scf_props = qc_schema.dev.properties.scf_properties.scf_properties +mp_props = qc_schema.dev.properties.mp_properties.mp_properties + +def write_header(data, header): + data.append("") + data.append(header) + data.append("-" * len(header)) + data.append("") + +def write_line_items(data, key, item): + data.append("") + data.append(key) + data.append("~" * len(key)) + data.append("") + if "description" in item: + data.append(item["description"]) + else: + data.append("No description available") + data.append("") + + +prop_file = ["Schema Properties"] +prop_file.append("=" * len(prop_file[-1])) + +intro = """ +A list of valid quantum chemistry properties tracked by the schema. +""" + +prop_file.extend(intro.split()) + +# Write out SCF properties +write_header(prop_file, "SCF Properties") + +for key, value in scf_props.items(): + write_line_items(prop_file, key, value) + +# Write out MP properties +write_header(prop_file, "Moller-Plesset Properties") + +for key, value in mp_props.items(): + write_line_items(prop_file, key, value) + +# Write out the file +with open("auto_props.rst", "w") as outfile: + outfile.write("\n".join(prop_file)) + + +### Write out Topology + +top_file = ["Schema Topology"] +top_file.append("=" * len(top_file[-1])) + +intro = """ +A list of valid quantum chemistry properties tracked by the schema. +""" + +top_file.extend(intro.split()) + +topo_props = qc_schema.dev.molecule.molecule["properties"] + +for key, value in topo_props.items(): + write_line_items(top_file, key, value) + +# Write out the file +with open("auto_topology.rst", "w") as outfile: + outfile.write("\n".join(top_file)) diff --git a/docs/source/index.rst b/docs/source/index.rst index 7e6e44b..32a89c9 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -68,3 +68,9 @@ Contents spec_components tech_specs +.. toctree:: + :maxdepth: 1 + :caption: Schema + + auto_topology + auto_props diff --git a/qc_schema/dev/properties/scf_properties.py b/qc_schema/dev/properties/scf_properties.py index 1a10ff3..7997791 100644 --- a/qc_schema/dev/properties/scf_properties.py +++ b/qc_schema/dev/properties/scf_properties.py @@ -18,8 +18,9 @@ "type": "number", "description": - """ + r""" The nuclear repulsion energy contribution [H] to the total SCF energy. + .. math:: E_{NN} = \sum_{i, j Date: Wed, 28 Mar 2018 09:20:28 -0400 Subject: [PATCH 11/18] MP2 total energy and travis fix --- .gitignore | 5 ----- .travis.yml | 1 + qc_schema/dev/molecule.py | 4 ++-- qc_schema/dev/properties/mp_properties.py | 7 +++++++ 4 files changed, 10 insertions(+), 7 deletions(-) diff --git a/.gitignore b/.gitignore index 117394d..20cef25 100644 --- a/.gitignore +++ b/.gitignore @@ -100,10 +100,5 @@ ENV/ # mypy .mypy_cache/ -# Vim leftover -*.swp - # Dev schema -schema/input_qc_schema.schema -schema/output_qc_schema.schema docs/source/auto*rst diff --git a/.travis.yml b/.travis.yml index 91cccda..b206302 100644 --- a/.travis.yml +++ b/.travis.yml @@ -18,6 +18,7 @@ before_install: - python -V - pip install --upgrade pip setuptools - pip install pytest jsonschema + - pip install -e . script: - py.test -v diff --git a/qc_schema/dev/molecule.py b/qc_schema/dev/molecule.py index d8c9473..fbbe2b5 100644 --- a/qc_schema/dev/molecule.py +++ b/qc_schema/dev/molecule.py @@ -19,7 +19,7 @@ } }, "masses": { - "description": "The mass of the molecule, canonical weights assumed if not given.", + "description": "The masses of the atoms in the molecule, canonical weights assumed if not given.", "type": "array", "items": { "type": "number" @@ -39,7 +39,7 @@ "default": 0.0 }, "multiplicity": { - "description": "The overall mulitiplicity of the molecule.", + "description": "The overall multiplicity of the molecule.", "type": "number", "multipleOf": 1.0, "default": 1 diff --git a/qc_schema/dev/properties/mp_properties.py b/qc_schema/dev/properties/mp_properties.py index 75ef35b..ef7161a 100644 --- a/qc_schema/dev/properties/mp_properties.py +++ b/qc_schema/dev/properties/mp_properties.py @@ -36,3 +36,10 @@ "description": "The doubles portion of the MP2 correlation energy including same-spin and opposite-spin correlations." } + +mp_properties['mp2_total_energy'] = { + "type": + "number", + "description": + "The total MP2 energy (MP2 correlatin energy + HF energy)." +} From f7f7ab12987c5e1cbe91c9363f33a2e4dcb73e06 Mon Sep 17 00:00:00 2001 From: Eric Berquist Date: Wed, 28 Mar 2018 10:05:18 -0400 Subject: [PATCH 12/18] Top-level Makefile --- .gitignore | 1 + Makefile | 11 +++++++++++ 2 files changed, 12 insertions(+) create mode 100644 Makefile diff --git a/.gitignore b/.gitignore index 20cef25..edbbb4a 100644 --- a/.gitignore +++ b/.gitignore @@ -45,6 +45,7 @@ nosetests.xml coverage.xml *.cover .hypothesis/ +.pytest_cache/ # Translations *.mo diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..7ce3ea3 --- /dev/null +++ b/Makefile @@ -0,0 +1,11 @@ +.PHONY: install +install: + pip install -e . + +.PHONY: test +test: + pytest -v + +.PHONY: docs +docs: + cd docs && make html From 3106cc276b123f5fb77cdb9f61ae8dfb60c4a747 Mon Sep 17 00:00:00 2001 From: Daniel Smith Date: Wed, 28 Mar 2018 12:14:59 -0400 Subject: [PATCH 13/18] Adds several validation failure tests --- .../missing_molecule_symbols.json | 18 +++++++++ tests/input_failures/multiplicity_float.json | 23 +++++++++++ tests/output_failures/dipole_wrong_type.json | 38 +++++++++++++++++++ tests/output_failures/unknown_property.json | 38 +++++++++++++++++++ tests/test_failures.py | 38 +++++++++++++++++++ 5 files changed, 155 insertions(+) create mode 100644 tests/input_failures/missing_molecule_symbols.json create mode 100644 tests/input_failures/multiplicity_float.json create mode 100644 tests/output_failures/dipole_wrong_type.json create mode 100644 tests/output_failures/unknown_property.json create mode 100644 tests/test_failures.py diff --git a/tests/input_failures/missing_molecule_symbols.json b/tests/input_failures/missing_molecule_symbols.json new file mode 100644 index 0000000..4cbca15 --- /dev/null +++ b/tests/input_failures/missing_molecule_symbols.json @@ -0,0 +1,18 @@ +{ + "molecule": { + "geometry": [ + 0, + 0, + 0, + 0, + 0, + 1 + ] + }, + "driver": "energy", + "keywords": { + "method": "SCF", + "basis": "sto-3g" + } +} + diff --git a/tests/input_failures/multiplicity_float.json b/tests/input_failures/multiplicity_float.json new file mode 100644 index 0000000..bb2280b --- /dev/null +++ b/tests/input_failures/multiplicity_float.json @@ -0,0 +1,23 @@ +{ + "molecule": { + "geometry": [ + 0, + 0, + 0, + 0, + 0, + 1 + ], + "symbols": [ + "He", + "He" + ], + "multiplicity": 5.5 + }, + "driver": "energy", + "keywords": { + "method": "SCF", + "basis": "sto-3g" + } +} + diff --git a/tests/output_failures/dipole_wrong_type.json b/tests/output_failures/dipole_wrong_type.json new file mode 100644 index 0000000..d93f183 --- /dev/null +++ b/tests/output_failures/dipole_wrong_type.json @@ -0,0 +1,38 @@ +{ + "provenance": { + "creator": "My QM Program", + "version": "1.1rc1", + "database": "https://pqr.pitt.edu/mol/HEFNNWSXXWATRW-JTQLQIEISA-N" + }, + "molecule": { + "geometry": [ + 0, + 0, + 0, + 0, + 0, + 1 + ], + "symbols": [ + "He", + "He" + ] + }, + "driver": "energy", + "keywords": { + "method": "SCF", + "basis": "sto-3g" + }, + "properties": { + "scf_iterations": 2, + "scf_dipole_moment": 0.0, + "scf_total_energy": -5.433191881443323, + "scf_one_electron_energy": -11.67399006298957, + "scf_two_electron_energy": 4.124089347186247, + "nuclear_repulsion_energy": 2.11670883436 + }, + "error": "", + "success": true, + "raw_output": "Output storing was not requested." +} + diff --git a/tests/output_failures/unknown_property.json b/tests/output_failures/unknown_property.json new file mode 100644 index 0000000..78b1a1a --- /dev/null +++ b/tests/output_failures/unknown_property.json @@ -0,0 +1,38 @@ +{ + "provenance": { + "creator": "My QM Program", + "version": "1.1rc1", + "database": "https://pqr.pitt.edu/mol/HEFNNWSXXWATRW-JTQLQIEISA-N" + }, + "molecule": { + "geometry": [ + 0, + 0, + 0, + 0, + 0, + 1 + ], + "symbols": [ + "He", + "He" + ] + }, + "driver": "energy", + "keywords": { + "method": "SCF", + "basis": "sto-3g" + }, + "properties": { + "scf_something": 2, + "scf_dipole_moment": 0.0, + "scf_total_energy": -5.433191881443323, + "scf_one_electron_energy": -11.67399006298957, + "scf_two_electron_energy": 4.124089347186247, + "nuclear_repulsion_energy": 2.11670883436 + }, + "error": "", + "success": true, + "raw_output": "Output storing was not requested." +} + diff --git a/tests/test_failures.py b/tests/test_failures.py new file mode 100644 index 0000000..5c44eab --- /dev/null +++ b/tests/test_failures.py @@ -0,0 +1,38 @@ +""" +Tests the JSON schema +""" +import jsonschema +import pytest +import os + +import test_helpers +import qc_schema + +### Test input validation errors +input_failures = test_helpers.list_tests("input_failures") +input_failure_ids = [x.split("/")[-1].replace(".json", "") for x in input_failures] + +# Loop over all tests that should pass the tests +@pytest.mark.parametrize("testfile", input_failures, ids=input_failure_ids) +@pytest.mark.parametrize("version", qc_schema.list_versions()) +def test_input_failures(version, testfile): + + example = test_helpers.get_test(testfile) + + with pytest.raises(jsonschema.exceptions.ValidationError): + qc_schema.validate(example, "input") + +### Test output validation errors +output_failures = test_helpers.list_tests("output_failures") +output_failure_ids = [x.split("/")[-1].replace(".json", "") for x in output_failures] + +# Loop over all tests that should pass the tests +@pytest.mark.parametrize("testfile", output_failures, ids=output_failure_ids) +@pytest.mark.parametrize("version", qc_schema.list_versions()) +def test_output_failures(version, testfile): + + example = test_helpers.get_test(testfile) + + with pytest.raises(jsonschema.exceptions.ValidationError): + qc_schema.validate(example, "output") + From 12e018836cbea8e5c9974fe9c16d441dbbe6956a Mon Sep 17 00:00:00 2001 From: Daniel Smith Date: Wed, 28 Mar 2018 13:20:31 -0400 Subject: [PATCH 14/18] Writes topology keys out as a table --- docs/source/gen_schema_docs.py | 77 ++++++++++++++++++++-- qc_schema/dev/properties/scf_properties.py | 8 +-- 2 files changed, 74 insertions(+), 11 deletions(-) diff --git a/docs/source/gen_schema_docs.py b/docs/source/gen_schema_docs.py index 07df8d5..13a9d5e 100644 --- a/docs/source/gen_schema_docs.py +++ b/docs/source/gen_schema_docs.py @@ -23,6 +23,7 @@ def write_line_items(data, key, item): data.append("No description available") data.append("") +### Schema Properties prop_file = ["Schema Properties"] prop_file.append("=" * len(prop_file[-1])) @@ -50,21 +51,83 @@ def write_line_items(data, key, item): outfile.write("\n".join(prop_file)) -### Write out Topology +### Schema Topology top_file = ["Schema Topology"] top_file.append("=" * len(top_file[-1])) -intro = """ -A list of valid quantum chemistry properties tracked by the schema. -""" - -top_file.extend(intro.split()) +top_file.extend(""" +A full description of the overall molecule its geometry, fragments, and charges. +""".splitlines()) topo_props = qc_schema.dev.molecule.molecule["properties"] +topo_req = qc_schema.dev.molecule.molecule["required"] + +table_widths = [27, 80, 20] +fmt_string = ' | {:%s} | {:%s} | {:%s} |' % tuple(table_widths) +dash_inds = tuple("-" * w for w in table_widths) +equals_inds = tuple("=" * w for w in table_widths) + +write_header(top_file, "Required Keys") + +top_file.extend(""" +The following properties are required for a topology. + +""".splitlines()) + +top_file.append(" +-{}-+-{}-+-{}-+".format(*dash_inds)) +top_file.append(fmt_string.format("Key Name", "Description", "Field Type")) +top_file.append(" +={}=+={}=+={}=+".format(*equals_inds)) + +for key in topo_req: + value = topo_props[key] + + dtype = value["type"] + + if value["type"] == "object": + description = value["$ref"] + else: + description = value["description"] + + if value["type"] == "array": + dtype = "array of " + value["items"]["type"] + "s" + + if len(description) >= table_widths[1]: + while len(description) > 0: + top_file.append(fmt_string.format(key, description, dtype)) + top_file.append(" +-{}-+-{}-+-{}-+".format(*dash_inds)) + else: + top_file.append(fmt_string.format(key, description, dtype)) + top_file.append(" +-{}-+-{}-+-{}-+".format(*dash_inds)) + +# Optional properties +write_header(top_file, "Optional Keys") + +top_file.extend(""" +The following keys are optional for the topology specification. + +""".splitlines()) + +top_file.append(" +-{}-+-{}-+-{}-+".format(*dash_inds)) +top_file.append(fmt_string.format("Key Name", "Description", "Field Type")) +top_file.append(" +={}=+={}=+={}=+".format(*equals_inds)) for key, value in topo_props.items(): - write_line_items(top_file, key, value) + if key in topo_req: + continue + + dtype = value["type"] + + if value["type"] == "object": + description = value["$ref"] + else: + description = value["description"] + + if value["type"] == "array": + dtype = "array of " + value["items"]["type"] + "s" + + top_file.append(fmt_string.format(key, description, dtype)) + top_file.append(" +-{}-+-{}-+-{}-+".format(*dash_inds)) # Write out the file with open("auto_topology.rst", "w") as outfile: diff --git a/qc_schema/dev/properties/scf_properties.py b/qc_schema/dev/properties/scf_properties.py index 7997791..6fd7cd9 100644 --- a/qc_schema/dev/properties/scf_properties.py +++ b/qc_schema/dev/properties/scf_properties.py @@ -5,13 +5,13 @@ scf_properties = {} scf_properties["scf_one_electron_energy"] = { - "description": "The one-electron energy contribution [H] to the total SCF energy.", + "description": "The one-electron energy contribution to the total SCF energy.", "type": "number" } scf_properties["scf_two_electron_energy"] = { "type": "number", - "description": "The two-electron energy contribution [H] to the total SCF energy." + "description": "The two-electron energy contribution to the total SCF energy." } scf_properties["nuclear_repulsion_energy"] = { @@ -19,7 +19,7 @@ "number", "description": r""" -The nuclear repulsion energy contribution [H] to the total SCF energy. +The nuclear repulsion energy contribution to the total SCF energy. .. math:: E_{NN} = \sum_{i, j Date: Thu, 29 Mar 2018 10:04:25 -0400 Subject: [PATCH 15/18] Updates travis to build docs via make --- .travis.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index b206302..6f93b17 100644 --- a/.travis.yml +++ b/.travis.yml @@ -21,7 +21,10 @@ before_install: - pip install -e . script: - - py.test -v + - make test + +after_success: + - make docs notifications: email: false From 8f9217005d21c981b0d78f8e4809bdb4b81d5832 Mon Sep 17 00:00:00 2001 From: Daniel Smith Date: Fri, 30 Mar 2018 09:21:23 -0400 Subject: [PATCH 16/18] Minor tweaks from Lori's feedback --- qc_schema/dev/molecule.py | 2 +- qc_schema/dev/properties/mp_properties.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/qc_schema/dev/molecule.py b/qc_schema/dev/molecule.py index fbbe2b5..e59ed4c 100644 --- a/qc_schema/dev/molecule.py +++ b/qc_schema/dev/molecule.py @@ -12,7 +12,7 @@ } }, "geometry": { - "description": "The 3N XYZ coordinates of the atoms involved.", + "description": "The (3N, ) vector of XYZ coordinates of the atoms.", "type": "array", "items": { "type": "number" diff --git a/qc_schema/dev/properties/mp_properties.py b/qc_schema/dev/properties/mp_properties.py index ef7161a..5ac41ec 100644 --- a/qc_schema/dev/properties/mp_properties.py +++ b/qc_schema/dev/properties/mp_properties.py @@ -18,12 +18,12 @@ "The unscaled portion of the MP2 correlation energy from opposite-spin or singlet doubles correlations." } -mp_properties["mp2_single_energy"] = { +mp_properties["mp2_singles_energy"] = { "type": "number", "description": "The singles portion of the MP2 correlation energy. Zero except in ROHF." } -mp_properties["mp2_double_energy"] = { +mp_properties["mp2_doubles_energy"] = { "type": "number", "description": From 585e1325109c96bb330d199296c4b22acfce6938 Mon Sep 17 00:00:00 2001 From: Daniel Smith Date: Fri, 30 Mar 2018 11:00:22 -0400 Subject: [PATCH 17/18] Magic 'header', multi description, and more partners --- docs/source/index.rst | 2 ++ qc_schema/dev/dev_schema.py | 16 +++++++++++++--- qc_schema/dev/molecule.py | 2 +- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/docs/source/index.rst b/docs/source/index.rst index 32a89c9..805d504 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -32,6 +32,7 @@ A concrete list of requirements for this schema can be found [here](Requirements - `Avogadro `_ - `Molecular Design Toolkit `_ - `VTK `_ + - `Jmol / JSmol `_ **Quantum Chemistry Engines:** - `MPQC `_ @@ -40,6 +41,7 @@ A concrete list of requirements for this schema can be found [here](Requirements **Translators:** - `cclib `_ + - `openbabel `_ Existing JSON Efforts diff --git a/qc_schema/dev/dev_schema.py b/qc_schema/dev/dev_schema.py index d951a48..fca315c 100644 --- a/qc_schema/dev/dev_schema.py +++ b/qc_schema/dev/dev_schema.py @@ -11,9 +11,11 @@ # The base schema definition base_schema = { "$schema": "http://json-schema.org/draft-06/schema#", + "name": "QC_JSON", + "version": "0.1.dev", + "url": "http://schema_host.org/schemas/v0.1/something.schema", "description": "The MolSSI Quantum Chemistry Schema", "type": "object", - "version": "0.1.dev", "properties": { "molecule": molecule.molecule, "driver": { @@ -24,8 +26,16 @@ "type": "object" }, "provenance": { - "type": "object", - "$ref": "#/definitions/provenance" + "anyOf": [{ + "type": "object", + "$ref": "#/definitions/provenance" + }, { + "type": "array", + "items": { + "type": "object", + "$ref": "#/definitions/provenance" + } + }] } }, "required": ["molecule", "driver", "keywords"], diff --git a/qc_schema/dev/molecule.py b/qc_schema/dev/molecule.py index e59ed4c..a50dcd2 100644 --- a/qc_schema/dev/molecule.py +++ b/qc_schema/dev/molecule.py @@ -80,7 +80,7 @@ } }, "fix_com": { - "description": "Whether to adjust to the molecule to the COM or not.", + "description": "Whether to adjust to the molecule to the center of mass or not.", "type": "boolean", "default": False }, From a21678ab4f85cd8bb8afb73936f209162bf30c07 Mon Sep 17 00:00:00 2001 From: Daniel Smith Date: Fri, 30 Mar 2018 11:28:07 -0400 Subject: [PATCH 18/18] Updates the tech specs rst to reflect the current big/small discussion --- docs/source/tech_specs.rst | 65 +++++++++------------- qc_schema/dev/properties/scf_properties.py | 2 +- 2 files changed, 26 insertions(+), 41 deletions(-) diff --git a/docs/source/tech_specs.rst b/docs/source/tech_specs.rst index 70843c6..55fd0ef 100644 --- a/docs/source/tech_specs.rst +++ b/docs/source/tech_specs.rst @@ -15,7 +15,7 @@ say, maintain a list of bonds between atoms. Some solutions are: 2) by JSON path reference (see, e.g., https://tools.ietf.org/html/draft-pbryan-zyp-json-ref-03) 3) JSON-LD allows some flexibility of referencing. Also gives flexibility to break one document or one JSON object into pieces that can be referenced against. - 4) by a unique key. (e.g., :code:`residue.id='a83nd83'`, `residue.atoms=['a9n3d9', '31di3']`) + 4) by a unique key. (e.g., :code:`residue.id='a83nd83'`, :code:`residue.atoms=['a9n3d9', '31di3']`) Array index is probably the best option - although they are a little fragile, they're no more fragile than path references, and require far less overhead @@ -35,8 +35,8 @@ For instance, velocity might be "angstrom/fs" Alternatives: 2) Allow strings of the form :code:`atom.velocity.units="angstrom/fs"`, but require that units be chosen from a specific list of specifications 3) Allow strings of the form :code:`atom.velocity.units="angstrom/fs"`, and require file parsers to parse the units according to a specified syntax - BDJ Note: There are multiple standards specifications for units, and conversions. If done right in a schema, you can use JSON-LD to - link to the actual standards definition. Some examples of what I have done, which aligns with CML: + Note: There are multiple standards specifications for units, and conversions. If done right in a schema, you can use JSON-LD to + link to the actual standards definition. Some examples in CML: .. code:: python @@ -64,48 +64,33 @@ Closed Questions Store large collections of objects ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -How do we store large lists of objects (such as lists of atoms or bonds?) +There exists multiple ways to arrange data which represents objects. These expressions come down to two primary categories: - 1) As a table of values (these values do very well in HDF5 as well) - 2) As a set of arrays - 3) As a list of objects (works well in HDF5 too, would be worth brining in a HDF5 expert) +The "big" approach where each field is a flat (1D) array for each category: -Examples: +.. code:: python + + { + "symbols": ["C", "C", ...], + "geometry": [0.000, 1.396, 0.000, 1.209, 0.698, 0.000, ...], + "masses": [12.017, 12.017, ...] + } +The "small" approach which has a closer object-base mapping: + .. code:: python - // 1) Storing fields as tables: creates an mmCIF/PDB-like layout - {atoms={type:'table[atom]', - fields=['name', 'atomic_number', 'mass/Dalton', 'residue_index', 'position/angstrom', 'momentum/angstrom*amu*fs^-1'] - entries=[ - ['CA', 6, 12.0, 0, [0.214,12.124,1.12], [0,0,0]], - ['N', 7, 14.20, 0, [0.214,12.124,1.12], [0,0,0]], - ...} - - // 2) Storing fields as arrays: much more compact, but harder to read and edit - {num_atoms=1234, - atoms={names:['CA','CB','OP' ...], - atomic_numbers:[6,6,8, ...], - masses:{val:[12.0, 12.0, 16.12, ...], units:'amu'}, - residue_indices:[0,0,0,1,1, ...], - positions:{val:[[0.214,12.124,1.12], [0.214,12.124,1.12], ...], units:'angstrom'}, - momenta:{val:[[0,0,0], [1,2,3], ...], units:'angstrom*amu*fs^-1'} - } - - // 3) Storing the fieldnames for each atom: readable, but makes the file huge - {atoms=[ - {name:'CA', atnum:6, residue_index:0, - mass:{value:12.00, units:'Daltons'}, - position:{value:[0.214,12.124,1.12], units:'angstroms'}, - momentum:{value:[0.0, 0.0, 0.0], units:'angstrom*dalton*fs^-1'}, - }, - {name:'N', atnum:7, residue_index:0, - mass:{value:14.20, units:'Daltons'}, - position:{value:[0.214,12.124,1.12], units:'angstroms'}, - momentum:{value:[0.0, 0.0, 0.0], units:'angstrom*dalton*fs^-1'}, - }, - ... - }] + { + "fields": ["symbols", "geometry", "masses"], + "table": [ + ["C", [0.000, 1.396, 0.000], 12.017], + ["C", [1.209, 0.698, 0.000], 12.017], + ... + ] } +For the QC Schema it was decided to follow the big approach as it has the following benefits: + - Serialization/deserialization is much faster due to the smaller number of objects generated. + - The "small" approach can lead to a complex hierachy of fields. + - It is generally thought the "big" approach is more straightfoward to program due to its flatter structure. diff --git a/qc_schema/dev/properties/scf_properties.py b/qc_schema/dev/properties/scf_properties.py index 6fd7cd9..655f951 100644 --- a/qc_schema/dev/properties/scf_properties.py +++ b/qc_schema/dev/properties/scf_properties.py @@ -5,7 +5,7 @@ scf_properties = {} scf_properties["scf_one_electron_energy"] = { - "description": "The one-electron energy contribution to the total SCF energy.", + "description": "The one-electron (core Hamiltonina) energy contribution to the total SCF energy.", "type": "number" }