Skip to content

Commit

Permalink
Added some contorted post-processing to include null check in IS KNOW…
Browse files Browse the repository at this point in the history
…N queries
  • Loading branch information
ml-evs committed May 26, 2020
1 parent af5a580 commit 7caecde
Show file tree
Hide file tree
Showing 2 changed files with 115 additions and 11 deletions.
45 changes: 44 additions & 1 deletion optimade/filtertransformers/mongo.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ def postprocess(self, query):

query = self._apply_relationship_filtering(query)
query = self._apply_length_operators(query)
query = self._apply_unknown_or_null_filter(query)

return query

Expand Down Expand Up @@ -134,7 +135,10 @@ def value_op_rhs(self, operator, value):

def known_op_rhs(self, arg):
# known_op_rhs: IS ( KNOWN | UNKNOWN )
return {"$exists": arg[1] == "KNOWN"}
# The OPTIMADE spec also required a type comparison with null, this must be post-processed
# so here we use a special key "#known" which will get replaced in post-processing with the
# expanded dict
return {"#known": arg[1] == "KNOWN"}

def fuzzy_string_op_rhs(self, arg):
# fuzzy_string_op_rhs: CONTAINS value | STARTS [ WITH ] value | ENDS [ WITH ] value
Expand Down Expand Up @@ -389,6 +393,45 @@ def replace_with_relationship(subdict, prop, expr):
filter_, check_for_entry_type, replace_with_relationship
)

def _apply_unknown_or_null_filter(self, filter_: dict) -> dict:
""" This method loops through the query and replaces the check for
KNOWN with a check for existence and a check for not null, and the
inverse for UNKNOWN.
"""

def check_for_known_filter(prop, expr):
""" Find cases where the query dict looks like
`{"field": {"#known": T/F}}` or
`{"field": "$not": {"#known": T/F}}`, which is a magic word
for KNOWN/UNKNOWN filters in this transformer.
"""
return isinstance(expr, dict) and (
"#known" in expr or "#known" in expr.get("$not", {})
)

def replace_known_filter_with_or(subdict, prop, expr):
nor = set(expr.keys()) == {"$not"}
if nor:
expr = expr["$not"]
if "$or" not in subdict:
subdict["$or"] = []

known = expr["#known"]
subdict["$or"].append({prop: {"$exists": known ^ nor}})
subdict["$or"].append(
{prop: {"$type": {"$ne" if known ^ nor else "$eq": "null"}}}
)

subdict.pop(prop)

return subdict

return recursive_postprocessing(
filter_, check_for_known_filter, replace_known_filter_with_or
)


def recursive_postprocessing(filter_, condition, replacement):
""" Recursively descend into the query, checking each dictionary
Expand Down
81 changes: 71 additions & 10 deletions tests/filtertransformers/test_mongo.py
Original file line number Diff line number Diff line change
Expand Up @@ -598,24 +598,85 @@ def test_list_properties(self):
},
)

def test_properties(self):
def test_known_properties(self):
# Filtering on Properties with unknown value
# TODO: {'$not': {'$exists': False}} can be simplified to {'$exists': True}
# The { $not: { $gt: 1.99 } } is different from the $lte operator. { $lte: 1.99 } returns only the documents
# where price field exists and its value is less than or equal to 1.99.
# Remember that the $not operator only affects other operators and cannot check fields and documents
# independently. So, use the $not operator for logical disjunctions and the $ne operator to test
# the contents of fields directly.
# source: https://docs.mongodb.com/manual/reference/operator/query/not/
self.assertEqual(
self.transform("chemical_formula_anonymous IS UNKNOWN"),
{
"$or": [
{"chemical_formula_anonymous": {"$exists": False}},
{"chemical_formula_anonymous": {"$type": {"$eq": "null"}}},
]
},
)
self.assertEqual(
self.transform("chemical_formula_anonymous IS KNOWN"),
{
"$or": [
{"chemical_formula_anonymous": {"$exists": True}},
{"chemical_formula_anonymous": {"$type": {"$ne": "null"}}},
]
},
)
self.assertEqual(
self.transform("NOT chemical_formula_anonymous IS UNKNOWN"),
{
"$or": [
{"chemical_formula_anonymous": {"$exists": True}},
{"chemical_formula_anonymous": {"$type": {"$ne": "null"}}},
]
},
)
self.assertEqual(
self.transform("NOT chemical_formula_anonymous IS KNOWN"),
{
"$or": [
{"chemical_formula_anonymous": {"$exists": False}},
{"chemical_formula_anonymous": {"$type": {"$eq": "null"}}},
]
},
)

self.assertEqual(
self.transform(
"chemical_formula_hill IS KNOWN AND NOT chemical_formula_anonymous IS UNKNOWN"
),
{
"$and": [
{"chemical_formula_hill": {"$exists": True}},
{"chemical_formula_anonymous": {"$not": {"$exists": False}}},
{
"$or": [
{"chemical_formula_hill": {"$exists": True}},
{"chemical_formula_hill": {"$type": {"$ne": "null"}}},
]
},
{
"$or": [
{"chemical_formula_anonymous": {"$exists": True}},
{"chemical_formula_anonymous": {"$type": {"$ne": "null"}}},
]
},
]
},
)

self.assertEqual(
self.transform(
"chemical_formula_hill IS KNOWN AND chemical_formula_anonymous IS UNKNOWN"
),
{
"$and": [
{
"$or": [
{"chemical_formula_hill": {"$exists": True}},
{"chemical_formula_hill": {"$type": {"$ne": "null"}}},
]
},
{
"$or": [
{"chemical_formula_anonymous": {"$exists": False}},
{"chemical_formula_anonymous": {"$type": {"$eq": "null"}}},
]
},
]
},
)
Expand Down

0 comments on commit 7caecde

Please sign in to comment.