diff --git a/.gitignore b/.gitignore index 7bbc71c..edbbb4a 100644 --- a/.gitignore +++ b/.gitignore @@ -45,6 +45,7 @@ nosetests.xml coverage.xml *.cover .hypothesis/ +.pytest_cache/ # Translations *.mo @@ -99,3 +100,6 @@ ENV/ # mypy .mypy_cache/ + +# Dev schema +docs/source/auto*rst diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..6f93b17 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,30 @@ +# After changing this file, check it on: +# http://lint.travis-ci.org/ +language: python + +# Run jobs on container-based infrastructure, can be overridden per job +sudo: false + +matrix: + include: + - python: 2.7 + - python: 3.5 + +before_install: + - uname -a + - free -m + - df -h + - ulimit -a + - python -V + - pip install --upgrade pip setuptools + - pip install pytest jsonschema + - pip install -e . + +script: + - make test + +after_success: + - make docs + +notifications: + email: false diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..7ce3ea3 --- /dev/null +++ b/Makefile @@ -0,0 +1,11 @@ +.PHONY: install +install: + pip install -e . + +.PHONY: test +test: + pytest -v + +.PHONY: docs +docs: + cd docs && make html diff --git a/Topology/README.md b/Topology/README.md index 1f50dec..74ebc7d 100644 --- a/Topology/README.md +++ b/Topology/README.md @@ -10,7 +10,7 @@ should likely be handled by a higher level driver and not make the spec more dif The following molecule specification is used. The required fields are: - - `symbols` (list) - A list of strings + - `symbols` (list) - A list of strings - `geometry` (list) - A 3N XYZ coordinate list of list in bohr, will likely change to encompass decided unit specifications The following are optional fields and default values (option, more a list of possibilities QM programs would want): @@ -23,15 +23,15 @@ The following are optional fields and default values (option, more a list of pos - `comment` (str) - Any additional comment one would attach to the molecule. - `fragments` (list of tuples, `[]`) - A list of indices (0-indexed) for molecular fragments within the topology. - `fragment_charges` (list of floats, `[]`) - A list of charges associated with the fragments tuple. - - `fragment_multiplicities` (list of ints, `[]`) - A list of multiplicites associated with each fragment. + - `fragment_multiplicities` (list of ints, `[]`) - A list of multiplicites associated with each fragment. + - `fix_com` (bool) - whether to adjust to the molecule to the COM or not + - `fix_orientation` (bool) - whether to rotate the molecule to a standard orientation or not - `provenance` (dict, `{}`) - The provencance of the molecule. - `doi` - A doi reference for the molecule. Other possible quantities: - Bonds - Holding data for MM computations - - Basis Sets per atom - - `fix_com` (bool) - whether to adjust to the molecule to the COM or not - - `fix_orientation` (bool) - whether to rotate the molecule to a standard orientation or not + - Basis Sets per atom - label (list of str) - Per-atom labels which may be seperate from fragments - Extend the `real` quantitity to cover real, ghost, absent, qm/mm region, etc. - - EFP quantities `fragment_types`, `coordinate_hints`. This is an example and likely not part of the spec. How would we handle this? + - EFP quantities `fragment_types`, `coordinate_hints`. This is an example and likely not part of the spec. How would we handle this? diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..edc7d7e --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,21 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line. +SPHINXOPTS = +SPHINXBUILD = sphinx-build +SPHINXPROJ = qc_schema +SOURCEDIR = source +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 0000000..2505698 --- /dev/null +++ b/docs/README.md @@ -0,0 +1,16 @@ +# Compiling QC_JSON_Schema's Documentation + +The docs for this project are built with [Sphinx](http://www.sphinx-doc.org/en/master/). +To compile the docs, first ensure that Sphinx and the ReadTheDocs theme are installed. + +```bash +pip install sphinx sphinx_rtd_theme +``` + +Once installed, you can use the `Makefile` in this directory to compile static HTML pages by +```bash +make html +``` + +The compiled docs will be in the `_build` directory and can be viewed by opening `index.html` (which may itself +be inside a directory called `html/` depending on what version of Sphinx is installed). diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 0000000..7dd12d8 --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,36 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=source +set BUILDDIR=_build +set SPHINXPROJ=qc_schema + +if "%1" == "" goto help + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 +) + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% + +:end +popd diff --git a/docs/source/conf.py b/docs/source/conf.py new file mode 100644 index 0000000..5ed947a --- /dev/null +++ b/docs/source/conf.py @@ -0,0 +1,166 @@ +# -*- coding: utf-8 -*- +# +# Configuration file for the Sphinx documentation builder. +# +# This file does only contain a selection of the most common options. For a +# full list see the documentation: +# http://www.sphinx-doc.org/en/stable/config + +# -- Path setup -------------------------------------------------------------- + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# +# import os +# import sys +# sys.path.insert(0, os.path.abspath('.')) + + +# -- Project information ----------------------------------------------------- + +# Super hacky auto gen +import sys +import os +sys.path.insert(1, os.path.dirname(__file__)) +import gen_schema_docs + +project = 'A schema for Quantum Chemistry' +copyright = "2018, The Molecular Sciences Software Institute" +author = 'The Molecular Sciences Software Institute' + +# The short X.Y version +version = '' +# The full version, including alpha/beta/rc tags +release = '' + + +# -- General configuration --------------------------------------------------- + +# If your documentation needs a minimal Sphinx version, state it here. +# +# needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + 'sphinx.ext.autodoc', + 'sphinx.ext.mathjax', +] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix(es) of source filenames. +# You can specify multiple suffix as a list of string: +# +# source_suffix = ['.rst', '.md'] +source_suffix = '.rst' + +# The master toctree document. +master_doc = 'index' + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +# +# This is also used if you do content translation via gettext catalogs. +# Usually you set "language" from the command line for these cases. +language = None + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path . +exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' + + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = 'sphinx_rtd_theme' + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +# +# html_theme_options = {} + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] + +# Custom sidebar templates, must be a dictionary that maps document names +# to template names. +# +# The default sidebars (for documents that don't match any pattern) are +# defined by theme itself. Builtin themes are using these templates by +# default: ``['localtoc.html', 'relations.html', 'sourcelink.html', +# 'searchbox.html']``. +# +# html_sidebars = {} + + +# -- Options for HTMLHelp output --------------------------------------------- + +# Output file base name for HTML help builder. +htmlhelp_basename = 'QC_JSON_Schemadoc' + + +# -- Options for LaTeX output ------------------------------------------------ + +latex_elements = { + # The paper size ('letterpaper' or 'a4paper'). + # + # 'papersize': 'letterpaper', + + # The font size ('10pt', '11pt' or '12pt'). + # + # 'pointsize': '10pt', + + # Additional stuff for the LaTeX preamble. + # + # 'preamble': '', + + # Latex figure (float) alignment + # + # 'figure_align': 'htbp', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, +# author, documentclass [howto, manual, or own class]). +latex_documents = [ + (master_doc, 'QC_JSON_Schema.tex', 'QC_JSON_Schema Documentation', + 'QC_JSON_Schema', 'manual'), +] + + +# -- Options for manual page output ------------------------------------------ + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [ + (master_doc, 'QC_JSON_Schema', 'QC_JSON_Schema Documentation', + [author], 1) +] + + +# -- Options for Texinfo output ---------------------------------------------- + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [ + (master_doc, 'QC_JSON_Schema', 'QC_JSON_Schema Documentation', + author, 'QC_JSON_Schema', 'A schema for Quantum Chemistry', + 'Miscellaneous'), +] + + +# -- Extension configuration ------------------------------------------------- diff --git a/docs/source/gen_schema_docs.py b/docs/source/gen_schema_docs.py new file mode 100644 index 0000000..13a9d5e --- /dev/null +++ b/docs/source/gen_schema_docs.py @@ -0,0 +1,134 @@ +""" +Very hacky way to write out the schema (for demo purposes only) +""" +import qc_schema + +scf_props = qc_schema.dev.properties.scf_properties.scf_properties +mp_props = qc_schema.dev.properties.mp_properties.mp_properties + +def write_header(data, header): + data.append("") + data.append(header) + data.append("-" * len(header)) + data.append("") + +def write_line_items(data, key, item): + data.append("") + data.append(key) + data.append("~" * len(key)) + data.append("") + if "description" in item: + data.append(item["description"]) + else: + data.append("No description available") + data.append("") + +### Schema Properties + +prop_file = ["Schema Properties"] +prop_file.append("=" * len(prop_file[-1])) + +intro = """ +A list of valid quantum chemistry properties tracked by the schema. +""" + +prop_file.extend(intro.split()) + +# Write out SCF properties +write_header(prop_file, "SCF Properties") + +for key, value in scf_props.items(): + write_line_items(prop_file, key, value) + +# Write out MP properties +write_header(prop_file, "Moller-Plesset Properties") + +for key, value in mp_props.items(): + write_line_items(prop_file, key, value) + +# Write out the file +with open("auto_props.rst", "w") as outfile: + outfile.write("\n".join(prop_file)) + + +### Schema Topology + +top_file = ["Schema Topology"] +top_file.append("=" * len(top_file[-1])) + +top_file.extend(""" +A full description of the overall molecule its geometry, fragments, and charges. +""".splitlines()) + +topo_props = qc_schema.dev.molecule.molecule["properties"] +topo_req = qc_schema.dev.molecule.molecule["required"] + +table_widths = [27, 80, 20] +fmt_string = ' | {:%s} | {:%s} | {:%s} |' % tuple(table_widths) +dash_inds = tuple("-" * w for w in table_widths) +equals_inds = tuple("=" * w for w in table_widths) + +write_header(top_file, "Required Keys") + +top_file.extend(""" +The following properties are required for a topology. + +""".splitlines()) + +top_file.append(" +-{}-+-{}-+-{}-+".format(*dash_inds)) +top_file.append(fmt_string.format("Key Name", "Description", "Field Type")) +top_file.append(" +={}=+={}=+={}=+".format(*equals_inds)) + +for key in topo_req: + value = topo_props[key] + + dtype = value["type"] + + if value["type"] == "object": + description = value["$ref"] + else: + description = value["description"] + + if value["type"] == "array": + dtype = "array of " + value["items"]["type"] + "s" + + if len(description) >= table_widths[1]: + while len(description) > 0: + top_file.append(fmt_string.format(key, description, dtype)) + top_file.append(" +-{}-+-{}-+-{}-+".format(*dash_inds)) + else: + top_file.append(fmt_string.format(key, description, dtype)) + top_file.append(" +-{}-+-{}-+-{}-+".format(*dash_inds)) + +# Optional properties +write_header(top_file, "Optional Keys") + +top_file.extend(""" +The following keys are optional for the topology specification. + +""".splitlines()) + +top_file.append(" +-{}-+-{}-+-{}-+".format(*dash_inds)) +top_file.append(fmt_string.format("Key Name", "Description", "Field Type")) +top_file.append(" +={}=+={}=+={}=+".format(*equals_inds)) + +for key, value in topo_props.items(): + if key in topo_req: + continue + + dtype = value["type"] + + if value["type"] == "object": + description = value["$ref"] + else: + description = value["description"] + + if value["type"] == "array": + dtype = "array of " + value["items"]["type"] + "s" + + top_file.append(fmt_string.format(key, description, dtype)) + top_file.append(" +-{}-+-{}-+-{}-+".format(*dash_inds)) + +# Write out the file +with open("auto_topology.rst", "w") as outfile: + outfile.write("\n".join(top_file)) diff --git a/docs/source/index.rst b/docs/source/index.rst new file mode 100644 index 0000000..805d504 --- /dev/null +++ b/docs/source/index.rst @@ -0,0 +1,78 @@ +.. QC_JSON_Schema documentation master file, created by + sphinx-quickstart on Thu Mar 15 13:55:56 2018. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Quantum Chemistry Schema +========================================================= +A JSON Schema for Quantum Chemistry + + +The purpose of this schema is to provide API like access to pre-existing quantum +chemistry packages to enable more complex workflows. The core of this is to +avoid parsing ASCII-based output files and place output variables, vectors, +matrices in a consistent format that can be easily parsed. + +High Level Aspirations +---------------------- +In order to help define the overall scope and direction of the specification several high level goals will be pursued: + + * Ability to connect to visualizers and GUI's + * Connect to existing Workflows tools + * Transfer data between QM programs (Orbitals, Densities, etc) + * Provide a rigorous record of computation for large scale QM databases + * Provide a framework for QM API access + +A concrete list of requirements for this schema can be found [here](Requirements.md). + +**Organizations:** + * `The Molecular Sciences Software Institute `_ + +**Visualizers:** + - `Avogadro `_ + - `Molecular Design Toolkit `_ + - `VTK `_ + - `Jmol / JSmol `_ + +**Quantum Chemistry Engines:** + - `MPQC `_ + - `NWChem `_ + - `Psi4 `_ + +**Translators:** + - `cclib `_ + - `openbabel `_ + + +Existing JSON Efforts +---------------------- +proposed spec. The idea is to pull from this diverse group and coalesce into a +single specification to prevent duplication of effort. + + * `Autodesk JSON `_ + * `BAGEL JSON `_ + * `Chemical JSON `_ + * `MPQC JSON `_ + * `NWChem JSON `_ + * `Psi4 JSON `_ + * `PyQC Schema `_ + * `Molpro Database XML `_ + * `Chemical Markup Language `_ + + +Contents +-------- + +.. toctree:: + :maxdepth: 1 + :caption: Contents + + spec_components + tech_specs + +.. toctree:: + :maxdepth: 1 + :caption: Schema + + auto_topology + auto_props diff --git a/docs/source/spec_components.rst b/docs/source/spec_components.rst new file mode 100644 index 0000000..df3fefe --- /dev/null +++ b/docs/source/spec_components.rst @@ -0,0 +1,68 @@ +Specification Components +======================== + +The JSON format is a general container, and the current work focuses primarily +aimed at developing a standard on top of the `JSON `_ (and +`BSON `_) format for chemical data. + +Purpose +------- + +The purpose here is to document the format, provide an `open +specification `_, establish +what is required or optional, and to provide a living specification as we extend +the format. This could reuse some of the previous work done in the `CML +format `_ for XML, and Chemical JSON and NWChem-JSON, and other +JSON/XML formats that have been used by codes. + +Input Components +---------------- + +Topology +~~~~~~~~ + +The closest representation to the real physical nature of the system. In +practical terms, for molecular sciences, this is the coordinates (in some form) +and the elements/Z-number at that coordinate. For both QM and MM, this is your +molecule. This may include bonding information and unit cell, lattice +parameters, etc, as well. + +This is the foundation upon which you build the model basis of your +calculation. + +Driver +~~~~~~ + +What are you looking to calculate: energy, gradient, Hessian, or property. + +Model +~~~~~ + +The overall mathematical model we are using for our calculation. Another way to +think about this is the largest superset that still obtains roughly the same +result. For example, Direct and Disk-based Hartree-Fock at different Schwarz +thresholds could be the same "method". However, density-fitted, LinK, or +Cholesky-based Hartree-Fock should be separate methods. + +In QM, this is the Hamiltonian (HF, DFT, ...) combined with the overall basis of +the calculation. An example in QM would be HF/STO-3G or B3LYP/6-311G**. Custom +basis sets can be handled with custom keywords. + +Keywords +~~~~~~~~ + +Various tunable parameters for the calculation. These vary widely, depending on +the basis and model chemistry. These represent the individual programs keywords currently. + +Output Components +~~~~~~~~~~~~~~~~~ + + * Repeat of input components + * Driver return - Return of the requested driver (energy/gradient/etc) + * Properties - Other properties/values constructed as by products of the computation + * Provenance - Code, computer, user information, actual settings used by the code (lots + of defaults when not specified, that do change over code versions, and are different between codes). + * Raw Output - If requested, the canonical domain specific ASCII output + * Skipped Input Fields - If the input allows pass through of other fields print the skipped ones + * Errors - If the computation failed the raised error should go here. + diff --git a/docs/source/tech_specs.rst b/docs/source/tech_specs.rst new file mode 100644 index 0000000..55fd0ef --- /dev/null +++ b/docs/source/tech_specs.rst @@ -0,0 +1,96 @@ +Technical Specifications +======================== +This document contains various technical considerations that are both open and those which have been discussed and closed. + +Open Questions +-------------- + +How do we reference other objects? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +JSON does not directly support object references. This makes it non-trivial to, +say, maintain a list of bonds between atoms. Some solutions are: + + 1) by array index (e.g., :code:`residue.atom_indices=[3,4,5,6]`) + 2) by JSON path reference (see, e.g., https://tools.ietf.org/html/draft-pbryan-zyp-json-ref-03) + 3) JSON-LD allows some flexibility of referencing. Also gives flexibility to break one document + or one JSON object into pieces that can be referenced against. + 4) by a unique key. (e.g., :code:`residue.id='a83nd83'`, :code:`residue.atoms=['a9n3d9', '31di3']`) + +Array index is probably the best option - although they are a little fragile, +they're no more fragile than path references, and require far less overhead +than unique keys. + +We need to look at this beyond atoms and bonds. Especially in workflows we can reuse pieces of data +from previous tasks in the workflow. Instead of repeating we can use referencing. + +See also: http://stackoverflow.com/q/4001474/1958900 + +How do we uniquely specify physical units? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +For instance, velocity might be "angstrom/fs" Alternatives: + + 1) Require units in the form {:code:`unit_name:exponent`}, e.g. :code:`atom.velocity.units={'angstrom':1, 'fs':-1}` + 2) Allow strings of the form :code:`atom.velocity.units="angstrom/fs"`, but require that units be chosen from a specific list of specifications + 3) Allow strings of the form :code:`atom.velocity.units="angstrom/fs"`, and require file parsers to parse the units according to a specified syntax + + Note: There are multiple standards specifications for units, and conversions. If done right in a schema, you can use JSON-LD to + link to the actual standards definition. Some examples in CML: + +.. code:: python + + "orbitalEnergy": {"units": "Hartree", "value": 0.935524} + "shieldingAnisotropy": {"units": "ppm","value": 17.5292} + + +JSON and HDF5 +~~~~~~~~~~~~~ + +The object specifications in this document are tailored to JSON, but can be +easily stored in an HDF5 file as well. HDF5 is, like JSON, hierarchical and +self-describing. These similarities make it easy to perform 1-to-1 +transformations between well-formed JSON and a corresponding HDF5 +representation. + +Unlike JSON, HDF5 is binary and requires custom libraries to read, but has far +better performance and storage characteristics for numerical data. We will +provide tools to easily interconvert files between JSON and HDF5. Applications +that support this format should always provide JSON support; ones that require +high performance should also support the HDF5 variant. + +Closed Questions +---------------- + +Store large collections of objects +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +There exists multiple ways to arrange data which represents objects. These expressions come down to two primary categories: + +The "big" approach where each field is a flat (1D) array for each category: + +.. code:: python + + { + "symbols": ["C", "C", ...], + "geometry": [0.000, 1.396, 0.000, 1.209, 0.698, 0.000, ...], + "masses": [12.017, 12.017, ...] + } + +The "small" approach which has a closer object-base mapping: + +.. code:: python + + { + "fields": ["symbols", "geometry", "masses"], + "table": [ + ["C", [0.000, 1.396, 0.000], 12.017], + ["C", [1.209, 0.698, 0.000], 12.017], + ... + ] + } + +For the QC Schema it was decided to follow the big approach as it has the following benefits: + - Serialization/deserialization is much faster due to the smaller number of objects generated. + - The "small" approach can lead to a complex hierachy of fields. + - It is generally thought the "big" approach is more straightfoward to program due to its flatter structure. + diff --git a/qc_schema/__init__.py b/qc_schema/__init__.py new file mode 100644 index 0000000..ca5ffda --- /dev/null +++ b/qc_schema/__init__.py @@ -0,0 +1,7 @@ +""" +The main init functions of the QC Schema project +""" + +from . import dev +from .validate import validate +from .versions import list_versions, get_schema diff --git a/qc_schema/data/__init__.py b/qc_schema/data/__init__.py new file mode 100644 index 0000000..f2e314d --- /dev/null +++ b/qc_schema/data/__init__.py @@ -0,0 +1,3 @@ +""" +Full versions of the schema will be inserted here. +""" diff --git a/qc_schema/dev/__init__.py b/qc_schema/dev/__init__.py new file mode 100644 index 0000000..9e3369a --- /dev/null +++ b/qc_schema/dev/__init__.py @@ -0,0 +1 @@ +from .dev_schema import input_dev_schema, output_dev_schema diff --git a/qc_schema/dev/definitions.py b/qc_schema/dev/definitions.py new file mode 100644 index 0000000..b9879a2 --- /dev/null +++ b/qc_schema/dev/definitions.py @@ -0,0 +1,41 @@ +""" +A list of definitions involved in the JSON schema. +""" + +definitions = {} + +definitions["error"] = { + "properties": { + "error_type": { + "description": "The type of error raised.", + "enum": ["convergence_error", "file_error", "memory_error"] + }, + "error_message": { + "description": "A description of the raised error.", + "type": "string" + } + }, + "required": ["error_type", "error_message"], + "description": "The type of error message raised.", + "additionalProperties": False +} + +definitions["provenance"] = { + "properties": { + "creator": { + "description": "The name of the person or program who created this object.", + "type": "string" + }, + "version": { + "description": "The version of the program which created this object, blank otherwise.", + "type": "string" + }, + "routine": { + "description": "The routine of the program which created this object, blank otherwise.", + "type": "string" + } + }, + "required": ["creator"], + "description": "A short provenance of the object.", + "additionalProperties": True +} diff --git a/qc_schema/dev/dev_schema.py b/qc_schema/dev/dev_schema.py new file mode 100644 index 0000000..fca315c --- /dev/null +++ b/qc_schema/dev/dev_schema.py @@ -0,0 +1,69 @@ +""" +Integrates all components of the QC Schema into a single one. +""" + +import copy + +from . import molecule +from . import definitions +from . import properties + +# The base schema definition +base_schema = { + "$schema": "http://json-schema.org/draft-06/schema#", + "name": "QC_JSON", + "version": "0.1.dev", + "url": "http://schema_host.org/schemas/v0.1/something.schema", + "description": "The MolSSI Quantum Chemistry Schema", + "type": "object", + "properties": { + "molecule": molecule.molecule, + "driver": { + "definition": "The type of computation requested", + "enum": ["energy", "gradient", "hessian", "property"] + }, + "keywords": { + "type": "object" + }, + "provenance": { + "anyOf": [{ + "type": "object", + "$ref": "#/definitions/provenance" + }, { + "type": "array", + "items": { + "type": "object", + "$ref": "#/definitions/provenance" + } + }] + } + }, + "required": ["molecule", "driver", "keywords"], + "definitions": definitions.definitions +} + +# Additional properties to contain in the output +output_properties = { + "properties": properties.properties, + "success": { + "type": "boolean" + }, + "error": { + "type": "object", + "$ref": "#/definitions/error" + }, +} + +# Snapshot the input dev schema +input_dev_schema = copy.deepcopy(base_schema) + +# Add additional output pieces +base_schema["properties"].update(output_properties) +base_schema["required"].extend(["provenance", "properties", "success"]) + +# Snapshot the input dev schema +output_dev_schema = copy.deepcopy(base_schema) + +#import json +#print(json.dumps(input_dev_schema, indent=2)) +#print(json.dumps(output_dev_schema, indent=2)) diff --git a/qc_schema/dev/molecule.py b/qc_schema/dev/molecule.py new file mode 100644 index 0000000..a50dcd2 --- /dev/null +++ b/qc_schema/dev/molecule.py @@ -0,0 +1,99 @@ +""" +The json-schema for the Molecule definition +""" +molecule = { + "$schema": "http://json-schema.org/draft-04/schema#", + "properties": { + "symbols": { + "description": "The atom symbol for each atom in the molecule.", + "type": "array", + "items": { + "type": "string" + } + }, + "geometry": { + "description": "The (3N, ) vector of XYZ coordinates of the atoms.", + "type": "array", + "items": { + "type": "number" + } + }, + "masses": { + "description": "The masses of the atoms in the molecule, canonical weights assumed if not given.", + "type": "array", + "items": { + "type": "number" + } + }, + "name": { + "description": "The name of the molecule.", + "type": "string" + }, + "comment": { + "description": "Any additional comment one would attach to the molecule.", + "type": "string" + }, + "charge": { + "description": "The overall charge of the molecule.", + "type": "number", + "default": 0.0 + }, + "multiplicity": { + "description": "The overall multiplicity of the molecule.", + "type": "number", + "multipleOf": 1.0, + "default": 1 + }, + "real": { + "description": "A list describing if the atoms are real or ghost.", + "type": "array", + "items": { + "type": "boolean" + } + }, + "fragments": { + "description": + "A list of indices (0-indexed) for molecular fragments within the topology.", + "type": + "array", + "items": { + "type": "array", + "items": { + "type": "number", + "multipleOf": 1.0 + } + } + }, + "fragment_charges": { + "description": "A list of charges associated with the fragments tuple.", + "type": "array", + "items": { + "type": "number" + } + }, + "fragment_multiplicities": { + "description": "A list of multiplicites associated with each fragment.", + "type": "array", + "items": { + "type": "number", + "multipleOf": 1.0 + } + }, + "fix_com": { + "description": "Whether to adjust to the molecule to the center of mass or not.", + "type": "boolean", + "default": False + }, + "fix_orientation": { + "description": "Whether to rotate the molecule to a standard orientation or not.", + "type": "boolean", + "default": False + }, + "provenance": { + "type": "object", + "$ref": "#/definitions/provenance" + } + }, + "required": ["symbols", "geometry"], + "description": "The physical cartesian representation of the molecular system" +} diff --git a/qc_schema/dev/properties/__init__.py b/qc_schema/dev/properties/__init__.py new file mode 100644 index 0000000..a55f7c3 --- /dev/null +++ b/qc_schema/dev/properties/__init__.py @@ -0,0 +1 @@ +from .properties_base import properties diff --git a/qc_schema/dev/properties/mp_properties.py b/qc_schema/dev/properties/mp_properties.py new file mode 100644 index 0000000..5ac41ec --- /dev/null +++ b/qc_schema/dev/properties/mp_properties.py @@ -0,0 +1,45 @@ +""" +The complete list of Moller-Plesset properties. +""" + +mp_properties = {} + +mp_properties["mp2_same_spin_correlation_energy"] = { + "type": + "number", + "description": + "The unscaled portion of the MP2 correlation energy from same-spin or triplet doubles correlations." +} + +mp_properties["mp2_opposite_spin_correlation_energy"] = { + "type": + "number", + "description": + "The unscaled portion of the MP2 correlation energy from opposite-spin or singlet doubles correlations." +} + +mp_properties["mp2_singles_energy"] = { + "type": "number", + "description": "The singles portion of the MP2 correlation energy. Zero except in ROHF." +} + +mp_properties["mp2_doubles_energy"] = { + "type": + "number", + "description": + "The doubles portion of the MP2 correlation energy including same-spin and opposite-spin correlations." +} + +mp_properties['mp2_total_correlation_energy'] = { + "type": + "number", + "description": + "The doubles portion of the MP2 correlation energy including same-spin and opposite-spin correlations." +} + +mp_properties['mp2_total_energy'] = { + "type": + "number", + "description": + "The total MP2 energy (MP2 correlatin energy + HF energy)." +} diff --git a/qc_schema/dev/properties/properties_base.py b/qc_schema/dev/properties/properties_base.py new file mode 100644 index 0000000..578a635 --- /dev/null +++ b/qc_schema/dev/properties/properties_base.py @@ -0,0 +1,17 @@ +""" +The base file for QC Schema properties. +""" + +from .scf_properties import scf_properties +from .mp_properties import mp_properties + +properties = { + "type": "object", + "properties": {}, + "description": "The resulting properties of a computation", + "additionalProperties": False +} + +# Update new keys +properties["properties"].update(scf_properties) +properties["properties"].update(mp_properties) diff --git a/qc_schema/dev/properties/scf_properties.py b/qc_schema/dev/properties/scf_properties.py new file mode 100644 index 0000000..655f951 --- /dev/null +++ b/qc_schema/dev/properties/scf_properties.py @@ -0,0 +1,71 @@ +""" +The complete list of SCF level properties. +""" + +scf_properties = {} + +scf_properties["scf_one_electron_energy"] = { + "description": "The one-electron (core Hamiltonina) energy contribution to the total SCF energy.", + "type": "number" +} + +scf_properties["scf_two_electron_energy"] = { + "type": "number", + "description": "The two-electron energy contribution to the total SCF energy." +} + +scf_properties["nuclear_repulsion_energy"] = { + "type": + "number", + "description": + r""" +The nuclear repulsion energy contribution to the total SCF energy. + +.. math:: E_{NN} = \sum_{i, j