Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add validator to check whether anonymous and reduced formulae are reduced #914

Merged
merged 2 commits into from
Aug 30, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
31 changes: 30 additions & 1 deletion optimade/models/structures.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
# pylint: disable=no-self-argument,line-too-long,no-name-in-module
import re
import warnings
import sys
import math
from functools import reduce
from enum import IntEnum, Enum
from sys import float_info
from typing import List, Optional, Union
Expand Down Expand Up @@ -874,7 +877,7 @@ def check_anonymous_formula(cls, v):
elements = tuple(re.findall(r"[A-Z][a-z]*", v))
numbers = [int(n.strip()) for n in re.split(r"[A-Z][a-z]*", v) if n.strip()]

if any(n == 1 for n in numbers):
if 1 in numbers:
raise ValueError(
f"'chemical_formula_anonymous' {v} must omit proportion '1'"
)
Expand All @@ -893,6 +896,32 @@ def check_anonymous_formula(cls, v):

return v

@validator("chemical_formula_anonymous", "chemical_formula_reduced")
def check_reduced_formulae(cls, value, field):
if value is None:
return value

numbers = [n.strip() or 1 for n in re.split(r"[A-Z][a-z]*", value)]
ml-evs marked this conversation as resolved.
Show resolved Hide resolved
# Need to remove leading 1 from split and convert to ints
numbers = [int(n) for n in numbers[1:]]

if 0 in numbers:
raise ValueError(
f"{field.name} {value!r} cannot contain chemical proportion of 0."
)

if sys.version_info[1] >= 9:
gcd = math.gcd(*numbers)
else:
gcd = reduce(math.gcd, numbers)
ml-evs marked this conversation as resolved.
Show resolved Hide resolved

if gcd != 1:
raise ValueError(
f"{field.name} {value!r} is not properly reduced: greatest common divisor was {gcd}, expected 1."
)

return value

@validator("elements", each_item=True)
def element_must_be_chemical_symbol(cls, v):
if v not in CHEMICAL_SYMBOLS:
Expand Down
7 changes: 7 additions & 0 deletions optimade/validator/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
""" This module contains the ImplementationValidator class and corresponding command line tools. """
# pylint: disable=import-outside-toplevel
from optimade import __version__, __api_version__
from .validator import ImplementationValidator
from .utils import DEFAULT_CONN_TIMEOUT

Expand Down Expand Up @@ -98,6 +99,12 @@ def validate(): # pragma: no cover
help="Additional HTTP headers to use for each request, specified as a JSON object.",
)

parser.add_argument(
ml-evs marked this conversation as resolved.
Show resolved Hide resolved
"--version",
action="version",
version=f"optimade-validator running from optimade-python-tools v{__version__} which implements OPTIMADE specification v{__api_version__}.",
)

parser.add_argument(
"--timeout",
type=float,
Expand Down
15 changes: 6 additions & 9 deletions optimade/validator/validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,12 +321,6 @@ def validate_implementation(self):
entry_info_endpoint
)

# Use the _entry_info_by_type to construct filters on the relevant endpoints
if not self.minimal:
for endp in self.available_json_endpoints:
self._log.debug("Testing queries on JSON entry endpoint of %s", endp)
self._recurse_through_endpoint(endp)

# Test that the results from multi-entry-endpoints obey, e.g. page limits,
# and that all entries can be deserialized with the patched models.
# These methods also set the test_ids for each type of entry, which are validated
Expand All @@ -340,6 +334,12 @@ def validate_implementation(self):
self._log.debug("Testing single entry request of type %s", endp)
self._test_single_entry_endpoint(endp)

# Use the _entry_info_by_type to construct filters on the relevant endpoints
if not self.minimal:
for endp in self.available_json_endpoints:
self._log.debug("Testing queries on JSON entry endpoint of %s", endp)
self._recurse_through_endpoint(endp)

# Test that the links endpoint can be serialized correctly
self._log.debug("Testing %s endpoint", CONF.links_endpoint)
self._test_info_or_links_endpoint(CONF.links_endpoint)
Expand Down Expand Up @@ -998,9 +998,6 @@ def _test_multi_entry_endpoint(self, endp: str) -> None:
"""Requests and deserializes a multi-entry endpoint with the
appropriate model.

TODO: deserialization is currently classed as an optional
test until our models are robust to support levels.

Parameters:
request_str: The multi-entry request to make, e.g.,
"structures?filter=nsites<10"
Expand Down
16 changes: 16 additions & 0 deletions tests/models/test_structures.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,22 @@ def test_bad_structures(bad_structures, mapper):
{"chemical_formula_reduced": "Ag6 Cl2"},
"chemical_formula_reduced\n string does not match regex",
),
(
{"chemical_formula_reduced": "Ge2Si2"},
"chemical_formula_reduced 'Ge2Si2' is not properly reduced: greatest common divisor was 2, expected 1.",
),
(
{"chemical_formula_reduced": "Ge144Si60V24"},
"chemical_formula_reduced 'Ge144Si60V24' is not properly reduced: greatest common divisor was 12, expected 1.",
),
(
{"chemical_formula_anonymous": "A10B5C5"},
"chemical_formula_anonymous 'A10B5C5' is not properly reduced: greatest common divisor was 5, expected 1.",
),
(
{"chemical_formula_anonymous": "A44B15C9D4E3F2GHI0J0K0L0"},
"chemical_formula_anonymous 'A44B15C9D4E3F2GHI0J0K0L0' cannot contain chemical proportion of 0.",
),
)


Expand Down