Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add null check to mongo filtertransformer for KNOWN/UNKNOWN filters #279

Merged
merged 3 commits into from
May 29, 2020
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
43 changes: 42 additions & 1 deletion optimade/filtertransformers/mongo.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ def postprocess(self, query):

query = self._apply_relationship_filtering(query)
query = self._apply_length_operators(query)
query = self._apply_unknown_or_null_filter(query)

return query

Expand Down Expand Up @@ -134,7 +135,10 @@ def value_op_rhs(self, operator, value):

def known_op_rhs(self, arg):
# known_op_rhs: IS ( KNOWN | UNKNOWN )
return {"$exists": arg[1] == "KNOWN"}
# The OPTIMADE spec also required a type comparison with null, this must be post-processed
# so here we use a special key "#known" which will get replaced in post-processing with the
# expanded dict
return {"#known": arg[1] == "KNOWN"}

def fuzzy_string_op_rhs(self, arg):
# fuzzy_string_op_rhs: CONTAINS value | STARTS [ WITH ] value | ENDS [ WITH ] value
Expand Down Expand Up @@ -389,6 +393,43 @@ def replace_with_relationship(subdict, prop, expr):
filter_, check_for_entry_type, replace_with_relationship
)

def _apply_unknown_or_null_filter(self, filter_: dict) -> dict:
""" This method loops through the query and replaces the check for
KNOWN with a check for existence and a check for not null, and the
inverse for UNKNOWN.

"""

def check_for_known_filter(prop, expr):
ml-evs marked this conversation as resolved.
Show resolved Hide resolved
""" Find cases where the query dict looks like
`{"field": {"#known": T/F}}` or
`{"field": "$not": {"#known": T/F}}`, which is a magic word
for KNOWN/UNKNOWN filters in this transformer.

"""
return isinstance(expr, dict) and (
"#known" in expr or "#known" in expr.get("$not", {})
ml-evs marked this conversation as resolved.
Show resolved Hide resolved
)

def replace_known_filter_with_or(subdict, prop, expr):
nor = set(expr.keys()) == {"$not"}
ml-evs marked this conversation as resolved.
Show resolved Hide resolved
if nor:
expr = expr["$not"]
if "$or" not in subdict:
subdict["$or"] = []

known = expr["#known"]
subdict["$or"].append({prop: {"$exists": known ^ nor}})
subdict["$or"].append({prop: {("$ne" if known ^ nor else "$eq"): None}})

subdict.pop(prop)

return subdict

return recursive_postprocessing(
filter_, check_for_known_filter, replace_known_filter_with_or
)


def recursive_postprocessing(filter_, condition, replacement):
""" Recursively descend into the query, checking each dictionary
Expand Down
81 changes: 71 additions & 10 deletions tests/filtertransformers/test_mongo.py
Original file line number Diff line number Diff line change
Expand Up @@ -598,24 +598,85 @@ def test_list_properties(self):
},
)

def test_properties(self):
def test_known_properties(self):
# Filtering on Properties with unknown value
# TODO: {'$not': {'$exists': False}} can be simplified to {'$exists': True}
# The { $not: { $gt: 1.99 } } is different from the $lte operator. { $lte: 1.99 } returns only the documents
# where price field exists and its value is less than or equal to 1.99.
# Remember that the $not operator only affects other operators and cannot check fields and documents
# independently. So, use the $not operator for logical disjunctions and the $ne operator to test
# the contents of fields directly.
# source: https://docs.mongodb.com/manual/reference/operator/query/not/
self.assertEqual(
self.transform("chemical_formula_anonymous IS UNKNOWN"),
{
"$or": [
{"chemical_formula_anonymous": {"$exists": False}},
{"chemical_formula_anonymous": {"$eq": None}},
]
},
)
self.assertEqual(
self.transform("chemical_formula_anonymous IS KNOWN"),
{
"$or": [
ml-evs marked this conversation as resolved.
Show resolved Hide resolved
{"chemical_formula_anonymous": {"$exists": True}},
{"chemical_formula_anonymous": {"$ne": None}},
]
},
)
self.assertEqual(
self.transform("NOT chemical_formula_anonymous IS UNKNOWN"),
{
"$or": [
{"chemical_formula_anonymous": {"$exists": True}},
{"chemical_formula_anonymous": {"$ne": None}},
]
},
)
self.assertEqual(
self.transform("NOT chemical_formula_anonymous IS KNOWN"),
{
"$or": [
{"chemical_formula_anonymous": {"$exists": False}},
{"chemical_formula_anonymous": {"$eq": None}},
]
},
)

self.assertEqual(
self.transform(
"chemical_formula_hill IS KNOWN AND NOT chemical_formula_anonymous IS UNKNOWN"
),
{
"$and": [
{"chemical_formula_hill": {"$exists": True}},
{"chemical_formula_anonymous": {"$not": {"$exists": False}}},
{
"$or": [
{"chemical_formula_hill": {"$exists": True}},
{"chemical_formula_hill": {"$ne": None}},
]
},
{
"$or": [
{"chemical_formula_anonymous": {"$exists": True}},
{"chemical_formula_anonymous": {"$ne": None}},
]
},
]
},
)

self.assertEqual(
self.transform(
"chemical_formula_hill IS KNOWN AND chemical_formula_anonymous IS UNKNOWN"
),
{
"$and": [
{
"$or": [
{"chemical_formula_hill": {"$exists": True}},
{"chemical_formula_hill": {"$ne": None}},
]
},
{
"$or": [
{"chemical_formula_anonymous": {"$exists": False}},
{"chemical_formula_anonymous": {"$eq": None}},
]
},
]
},
)
Expand Down