Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handling null fields in the filtertransformer and validator #796

Merged
merged 2 commits into from
May 14, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/CODEOWNERS
Validating CODEOWNERS rules …
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
* @CasperWA @ml-evs
*mongo* @ml-evs @shyamd
*elastic* @ml-evs @markus1978
* @CasperWA @ml-evs @JPBergsma
*mongo* @CasperWA @JPBergsma @ml-evs @shyamd
*elastic* @CasperWA @JPBergsma @ml-evs @markus1978
8 changes: 5 additions & 3 deletions optimade/filtertransformers/elasticsearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,9 +109,11 @@ def _query_op(self, quantity, op, value, nested=None):
return Q(query_type, **{field: value})

if op == "!=":
return ~Q( # pylint: disable=invalid-unary-operand-type
query_type, **{field: value}
)
# != queries must also include an existence check
# Note that for MongoDB, `$exists` will include null-valued fields,
# where as in ES `exists` excludes them.
# pylint: disable=invalid-unary-operand-type
return ~Q(query_type, **{field: value}) & Q("exists", field=field)

def _has_query_op(self, quantities, op, predicate_zip_list):
"""
Expand Down
6 changes: 6 additions & 0 deletions optimade/filtertransformers/mongo.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,12 @@ def expression_phrase(self, arg):
def property_first_comparison(self, arg):
# property_first_comparison: property ( value_op_rhs | known_op_rhs | fuzzy_string_op_rhs | set_op_rhs |
# set_zip_op_rhs | length_op_rhs )

# Awkwardly, MongoDB will match null fields in $ne filters,
# so we need to add a check for null equality in evey $ne query.
if "$ne" in arg[1]:
return {"$and": [{arg[0]: arg[1]}, {arg[0]: {"$ne": None}}]}

return {arg[0]: arg[1]}

def constant_first_comparison(self, arg):
Expand Down
17 changes: 17 additions & 0 deletions optimade/server/data/test_structures.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"_id": {
"$oid": "5cfb441f053b174410700d02"
},
"assemblies": null,
"chemsys": "Ac",
"cartesian_site_positions": [
[
Expand Down Expand Up @@ -78,6 +79,7 @@
"_id": {
"$oid": "5cfb441f053b174410700d03"
},
"assemblies": null,
"chemsys": "Ac-Ag-Ir",
"cartesian_site_positions": [
[
Expand Down Expand Up @@ -193,6 +195,7 @@
"_id": {
"$oid": "5cfb441f053b174410700d04"
},
"assemblies": null,
"chemsys": "Ac-Ag-Pb",
"cartesian_site_positions": [
[
Expand Down Expand Up @@ -308,6 +311,7 @@
"_id": {
"$oid": "5cfb441f053b174410700d18"
},
"assemblies": null,
"chemsys": "Ac-Mg",
"cartesian_site_positions": [
[
Expand Down Expand Up @@ -396,6 +400,7 @@
"_id": {
"$oid": "5cfb441f053b174410700d1f"
},
"assemblies": null,
"chemsys": "Ac-O",
"cartesian_site_positions": [
[
Expand Down Expand Up @@ -496,6 +501,7 @@
"_id": {
"$oid": "5cfb441f053b174410700d6f"
},
"assemblies": null,
"chemsys": "Ac-Cu-F-O",
"cartesian_site_positions": [
[
Expand Down Expand Up @@ -618,6 +624,7 @@
"_id": {
"$oid": "5cfb441f053b174410700dc9"
},
"assemblies": null,
"chemsys": "Ag",
"cartesian_site_positions": [
[
Expand Down Expand Up @@ -683,6 +690,7 @@
"_id": {
"$oid": "5cfb441f053b174410700ddd"
},
"assemblies": null,
"chemsys": "Ag-Br-Cl-Te",
"cartesian_site_positions": [
[
Expand Down Expand Up @@ -871,6 +879,7 @@
"_id": {
"$oid": "5cfb441f053b174410700e04"
},
"assemblies": null,
"chemsys": "Ag-C-Cl-N-O-S",
"cartesian_site_positions": [
[
Expand Down Expand Up @@ -1045,6 +1054,7 @@
"_id": {
"$oid": "5cfb441f053b174410700e11"
},
"assemblies": null,
"chemsys": "Ag-C-Cl-H-N",
"cartesian_site_positions": [
[
Expand Down Expand Up @@ -1292,6 +1302,7 @@
"_id": {
"$oid": "5cfb441f053b174410700e15"
},
"assemblies": null,
"chemsys": "Ag-C-H-N-O",
"cartesian_site_positions": [
[
Expand Down Expand Up @@ -1545,6 +1556,7 @@
"_id": {
"$oid": "5cfb441f053b174410700e1a"
},
"assemblies": null,
"chemsys": "Ag-C-Cl-H-N-O-S",
"cartesian_site_positions": [
[
Expand Down Expand Up @@ -1808,6 +1820,7 @@
"_id": {
"$oid": "5cfb441f053b174410700ebf"
},
"assemblies": null,
"chemsys": "Ag-Br-Cl-Hg-I-S",
"cartesian_site_positions": [
[
Expand Down Expand Up @@ -2030,6 +2043,7 @@
"_id": {
"$oid": "5cfb441f053b174410700f28"
},
"assemblies": null,
"chemsys": "Ag-B-C-Cl-H-N-O-P",
"cartesian_site_positions": [
[
Expand Down Expand Up @@ -2610,6 +2624,7 @@
"_id": {
"$oid": "5cfb441f053b174410700f79"
},
"assemblies": null,
"chemsys": "Ag-C-Cl-H-N-O-S",
"cartesian_site_positions": [
[
Expand Down Expand Up @@ -2909,6 +2924,7 @@
"_id": {
"$oid": "5cfb441f053b174410701bdc"
},
"assemblies": null,
"chemsys": "Ba-Ce-Fe-H-Na-O-Si-Ti",
"cartesian_site_positions": [
[
Expand Down Expand Up @@ -3309,6 +3325,7 @@
"_id": {
"$oid": "5cfb441f053b174410701bec"
},
"assemblies": null,
"chemsys": "Ba-F-H-Mn-Na-O-Re-Si-Ti",
"cartesian_site_positions": [
[
Expand Down
18 changes: 18 additions & 0 deletions optimade/validator/validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -827,6 +827,15 @@ def _construct_single_property_filters(
"returning different results each time."
)

# check that the filter returned no entries that had a null or missing value for the filtered property
if any(
entry["attributes"].get(prop, entry.get(prop, None)) is None
for entry in reversed_response.get("data", [])
):
raise ResponseError(
f"Filter {reversed_query!r} on field {prop!r} returned entries that had null or missing values for the field."
CasperWA marked this conversation as resolved.
Show resolved Hide resolved
)

excluded = operator in exclusive_operators
# if we have all results on this page, check that the blessed ID is in the response
if not response["meta"]["more_data_available"]:
Expand All @@ -848,6 +857,15 @@ def _construct_single_property_filters(
f"(with field '{prop} = {test_value}') potentially indicating a problem with filtering on this field."
)

# check that the filter returned no entries that had a null or missing value for the filtered property
if any(
entry["attributes"].get(prop, entry.get(prop, None)) is None
for entry in response.get("data", [])
):
raise ResponseError(
f"Filter {query!r} on field {prop!r} returned entries that had null or missing values for the field."
)

if prop in CONF.unique_properties:
if num_data_returned["="] is not None and num_data_returned["="] == 0:
raise ResponseError(
Expand Down
25 changes: 21 additions & 4 deletions tests/filtertransformers/test_mongo.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,9 @@ def test_simple_comparisons(self):
assert self.transform("a>3") == {"a": {"$gt": 3}}
assert self.transform("a>=3") == {"a": {"$gte": 3}}
assert self.transform("a=3") == {"a": {"$eq": 3}}
assert self.transform("a!=3") == {"a": {"$ne": 3}}
assert self.transform("a!=3") == {
"$and": [{"a": {"$ne": 3}}, {"a": {"$ne": None}}]
}

def test_id(self):
assert self.transform('id="example/1"') == {"id": {"$eq": "example/1"}}
Expand Down Expand Up @@ -135,7 +137,12 @@ def test_operators(self):
) == {
"$and": [
{"chemical_formula_hill": {"$eq": "H2O"}},
{"chemical_formula_anonymous": {"$ne": "AB"}},
{
"$and": [
{"chemical_formula_anonymous": {"$ne": "AB"}},
{"chemical_formula_anonymous": {"$ne": None}},
]
},
]
}
assert self.transform(
Expand All @@ -149,7 +156,12 @@ def test_operators(self):
{"nelements": {"$gte": 10}},
{
"$nor": [
{"_exmpl_x": {"$ne": "Some string"}},
{
"$and": [
{"_exmpl_x": {"$ne": "Some string"}},
{"_exmpl_x": {"$ne": None}},
]
},
{"_exmpl_a": {"$not": {"$eq": 7}}},
]
},
Expand Down Expand Up @@ -443,7 +455,12 @@ class MyMapper(mapper("StructureMapper")):

assert transformer.transform(
parser.parse('immutable_id != "5cfb441f053b174410700d02"')
) == {"_id": {"$ne": ObjectId("5cfb441f053b174410700d02")}}
) == {
"$and": [
{"_id": {"$ne": ObjectId("5cfb441f053b174410700d02")}},
{"_id": {"$ne": None}},
]
}

for op in ("CONTAINS", "STARTS WITH", "ENDS WITH", "HAS"):
with pytest.raises(
Expand Down
29 changes: 29 additions & 0 deletions tests/server/routers/test_structures.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,3 +154,32 @@ def test_structures_endpoint_data(self):
assert len(self.json_response["data"]) == 2
assert "included" in self.json_response
assert len(self.json_response["included"]) == 2


class TestStructuresWithNullFieldsDoNotMatchNegatedFilters(RegularEndpointTests):
"""Tests that structures with e.g., `'assemblies':null` do not get
returned for negated queries like `filter=assemblies != 1`, as mandated
by the specification.

"""

request_str = "/structures?filter=assemblies != 1"
CasperWA marked this conversation as resolved.
Show resolved Hide resolved
response_cls = StructureResponseMany

def test_structures_endpoint_data(self):
"""Check that no structures are returned."""
assert len(self.json_response["data"]) == 0


class TestStructuresWithNullFieldsMatchUnknownFilter(RegularEndpointTests):
"""Tests that structures with e.g., `'assemblies':null` do get
returned for queries testing for "UNKNOWN" fields.

"""

request_str = "/structures?filter=assemblies IS UNKNOWN"
response_cls = StructureResponseMany

def test_structures_endpoint_data(self):
"""Check that all structures are returned."""
assert len(self.json_response["data"]) == 17