Skip to content

Commit

Permalink
Add extended filtering (#87)
Browse files Browse the repository at this point in the history
* Fixing CDATA warpping in the request XML templates.

* New style data filtering.

* Renaming range_filters in the downloaded data.

The applied filters are no-longer exclusively range filters.

* Fixing typos.

* Extend docstrings and fix style

* Version bump to 0.11.0

Co-authored-by: Martin Paces <martin.paces@eox.at>
  • Loading branch information
smithara and pacesm committed Sep 2, 2022
1 parent ef94235 commit c2c8d7b
Show file tree
Hide file tree
Showing 7 changed files with 201 additions and 33 deletions.
2 changes: 1 addition & 1 deletion src/viresclient/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,4 @@
from ._config import ClientConfig, set_token
from ._data_handling import ReturnedData, ReturnedDataFile

__version__ = "0.10.4"
__version__ = "0.11.0"
190 changes: 175 additions & 15 deletions src/viresclient/_client_swarm.py
Original file line number Diff line number Diff line change
Expand Up @@ -1627,39 +1627,199 @@ def set_products(
self._request_inputs.custom_shc = custom_shc
return self

def set_range_filter(self, parameter=None, minimum=None, maximum=None):
"""Set a filter to apply.
def set_range_filter(self, parameter, minimum=None, maximum=None, negate=False):
"""Set a range filter to apply.
Filters data for minimum ≤ parameter ≤ maximum
Filters data for minimum ≤ parameter ≤ maximum,
or parameter < minimum OR parameter > maximum if negated.
Note:
Apply multiple filters with successive calls to set_range_filter()
- Apply multiple filters with successive calls to ``.set_range_filter()``
- See :py:meth:`SwarmRequest.add_filter` for arbitrary filters.
Args:
parameter (str)
minimum (float)
maximum (float)
minimum (float or integer)
maximum (float or integer)
Examples:
``request.set_range_filter("Latitude", 0, 90)``
to set "Latitude >= 0 AND Latitude <= 90"
``request.set_range_filter("Latitude", 0, 90, negate=True)``
to set "(Latitude < 0 OR Latitude > 90)"
"""
if not isinstance(parameter, str):
raise TypeError("parameter must be a str")

def _generate_filters(minop, maxop):
if minimum is not None:
yield f"{parameter} {minop} {minimum}"
if maximum is not None:
yield f"{parameter} {maxop} {maximum}"

nargs = 2 - (minimum is None) - (maximum is None)
if nargs == 0:
return

filter_ = (
" AND ".join(_generate_filters(">=", "<="))
if not negate
else " OR ".join(_generate_filters("<", ">"))
)

if nargs > 1:
filter_ = f"({filter_})"

self.add_filter(filter_)

return self

def set_choice_filter(self, parameter, *values, negate=False):
"""Set a choice filter to apply.
Filters data for *parameter in values*,
or *parameter not in values* if negated.
Note:
See :py:meth:`SwarmRequest.add_filter` for arbitrary filters.
Args:
parameter (str)
values (float or integer or string)
Examples:
``request.set_choice_filter("Flags_F", 0, 1)``
to set "(Flags_F == 0 OR Flags_F == 1)"
``request.set_choice_filter("Flags_F", 0, 1, negate=True)``
to set "(Flags_F != 0 AND Flags_F != 1)"
"""
if not isinstance(parameter, str):
raise TypeError("parameter must be a str")

def _generate_filters(compop):
for value in values:
yield f"{parameter} {compop} {value!r}"

nargs = len(values)
if nargs == 0:
return

filter_ = (
" OR ".join(_generate_filters("=="))
if not negate
else " AND ".join(_generate_filters("!="))
)

if nargs > 1:
filter_ = f"({filter_})"

self.add_filter(filter_)

return self

def set_bitmask_filter(self, parameter, selection=0, mask=-1, negate=False):
"""Set a bitmask filter to apply.
Filters data for *parameter & mask == selection & mask*,
or *parameter & mask != selection & mask* if negated.
Note:
See :py:meth:`SwarmRequest.add_filter` for arbitrary filters.
Args:
parameter (str)
mask (integer)
selection (integer)
Examples:
``request.set_bitmask_filter("Flags_F", 0, 1)``
to set "Flags_F & 1 == 0" (i.e. bit 1 is set to 0)
"""
if not isinstance(parameter, str):
raise TypeError("parameter must be a str")
# Update the list that contains the separate filters
self._filterlist += [parameter + ":" + str(minimum) + "," + str(maximum)]
# Convert the list into the string that gets passed to the xml template
if len(self._filterlist) == 1:
filters = self._filterlist[0]

def _get_filter(compop):
return (
f"{parameter} & {mask} {compop} {selection & mask}"
if mask != -1
else f"{parameter} {compop} {selection}"
)

if not negate:
if mask != 0: # avoid pointless (0 == 0) filter
self.add_filter(_get_filter("=="))
else:
filters = ";".join(self._filterlist)
# Update the SwarmWPSInputs object
self._request_inputs.filters = filters
# mask == 0 leads to (0 != 0) filter and nothing is selected.
self.add_filter(_get_filter("!="))

return self

def clear_range_filter(self):
def add_filter(self, filter_):
"""Add an arbitrary data filter.
Filter grammar:
.. code-block:: text
filter: predicate
predicate:
variable == literal |
variable != literal |
variable < number |
variable > number |
variable <= number |
variable >= number |
variable & unsigned-integer == unsigned-integer |
variable & unsigned-integer != unsigned-integer |
(predicate AND predicate [AND predicate ...]) |
(predicate OR predicate [OR predicate ...]) |
NOT predicate
literal: boolean | integer | float | string
number: integer | float
variable: identifier | identifier[index]
index: integer[, integer ...]
Both single- and double quoted strings are allowed.
NaN values are matched by the ==/!= operators, i.e., the predicates
are internally converted to a proper "IS NaN" or "IS NOT NaN"
comparison.
Examples:
"Flags & 128 == 0"
Match records with Flag bit 7 set to 0.
"Elevation >= 15"
Match values with values greater than or equal to 15.
"(Label == "D" OR Label == "N" OR LABEL = "X")"
Match records with Label set to D, N or X.
"(Type != 1 AND Type != 34) NOT (Type == 1 OR Type == 34)"
Exclude records with Type set to 1 or 34.
"(Vector[2] <= -0.1 OR Vector[2] >= 0.5)"
Match records with Vector[2] values outside of the (-0.1, 0.5)
range.
"""
if not isinstance(filter_, str):
raise TypeError("parameter must be a str")
self._filterlist.append(filter_)
# Update the SwarmWPSInputs object
self._request_inputs.filters = " AND ".join(self._filterlist)

def clear_filters(self):
"""Remove all applied filters."""
self._filterlist = []
self._request_inputs.filters = None
return self

clear_range_filter = clear_filters # alias for backward compatibility

def applied_filters(self):
"""Print currently applied filters."""
for filter_ in self._filterlist:
print(filter_)

def get_times_for_orbits(
self, start_orbit, end_orbit, mission="Swarm", spacecraft=None
):
Expand Down
16 changes: 8 additions & 8 deletions src/viresclient/_data_handling.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ def __init__(self, file, filetype="cdf"):
self.magnetic_models = self._ensure_list(
globalatts.get("MAGNETIC_MODELS", [])
)
self.range_filters = self._ensure_list(globalatts.get("DATA_FILTERS", []))
self.data_filters = self._ensure_list(globalatts.get("DATA_FILTERS", []))
self.variables = self._cdf.cdf_info()["zVariables"]
self._varatts = {var: self._cdf.varattsget(var) for var in self.variables}
self._varinfo = {var: self._cdf.varinq(var) for var in self.variables}
Expand Down Expand Up @@ -629,10 +629,10 @@ def magnetic_models(self):
return magnetic_models

@property
def range_filters(self):
def data_filters(self):
with FileReader(self._file) as f:
range_filters = f.range_filters
return range_filters
data_filters = f.data_filters
return data_filters


class ReturnedData:
Expand All @@ -645,7 +645,7 @@ class ReturnedData:
...
data = request.get_between(..., ...)
data.sources
data.range_filters
data.data_filters
data.magnetic_models
data.as_xarray()
data.as_xarray_dict()
Expand Down Expand Up @@ -698,11 +698,11 @@ def magnetic_models(self):
return sorted(models)

@property
def range_filters(self):
def data_filters(self):
"""Get list of filters applied."""
filters = set()
for item in self._contents:
filters.update(item.range_filters)
filters.update(item.data_filters)
return sorted(filters)

@property
Expand Down Expand Up @@ -803,7 +803,7 @@ def as_xarray(self, reshape=False):
ds.attrs["Sources"] = self.sources
if self.filetype == "cdf":
ds.attrs["MagneticModels"] = self.magnetic_models
ds.attrs["RangeFilters"] = self.range_filters
ds.attrs["AppliedFilters"] = self.data_filters
return ds

def as_xarray_dict(self):
Expand Down
8 changes: 8 additions & 0 deletions src/viresclient/_wps/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,19 @@

from jinja2 import Environment, FileSystemLoader


def wrap_as_cdata(content):
"""Wrap content by the XML CDATA element."""
content = content.replace("]]>", "]]]]><![CDATA[>")
return f"<![CDATA[{content}]]>"


_DIRNAME = dirname(__file__)
_TEMPLATESDIR = join(_DIRNAME, "templates")
JINJA2_ENVIRONMENT = Environment(loader=FileSystemLoader(_TEMPLATESDIR))
JINJA2_ENVIRONMENT.filters.update(
d2s=lambda d: d.isoformat("T") + "Z",
l2s=lambda l: ", ".join(str(v) for v in l),
o2j=json.dumps,
cdata=wrap_as_cdata,
)
8 changes: 4 additions & 4 deletions src/viresclient/_wps/templates/vires_fetch_filtered_data.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,22 @@
<wps:Input>
<ows:Identifier>collection_ids</ows:Identifier>
<wps:Data>
<wps:ComplexData mimeType="application/json"><![CDATA[{{ collection_ids|o2j }}]]></wps:ComplexData>
<wps:ComplexData mimeType="application/json">{{ collection_ids|o2j|cdata }}</wps:ComplexData>
</wps:Data>
</wps:Input>
{% if model_expression -%}
<wps:Input>
<ows:Identifier>model_ids</ows:Identifier>
<wps:Data>
<wps:LiteralData>{{ model_expression }}</wps:LiteralData>
<wps:LiteralData>{{ model_expression|cdata }}</wps:LiteralData>
</wps:Data>
</wps:Input>
{% endif -%}
{% if custom_shc -%}
<wps:Input>
<ows:Identifier>shc</ows:Identifier>
<wps:Data>
<wps:ComplexData>{{ custom_shc }}</wps:ComplexData>
<wps:ComplexData>{{ custom_shc|cdata }}</wps:ComplexData>
</wps:Data>
</wps:Input>
{% endif -%}
Expand All @@ -46,7 +46,7 @@
<wps:Input>
<ows:Identifier>filters</ows:Identifier>
<wps:Data>
<wps:LiteralData>{{ filters }}</wps:LiteralData>
<wps:LiteralData>{{ filters|cdata }}</wps:LiteralData>
</wps:Data>
</wps:Input>
{% endif -%}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,22 @@
<wps:Input>
<ows:Identifier>collection_ids</ows:Identifier>
<wps:Data>
<wps:ComplexData mimeType="application/json"><![CDATA[{{ collection_ids|o2j }}]]></wps:ComplexData>
<wps:ComplexData mimeType="application/json">{{ collection_ids|o2j|cdata }}</wps:ComplexData>
</wps:Data>
</wps:Input>
{% if model_expression -%}
<wps:Input>
<ows:Identifier>model_ids</ows:Identifier>
<wps:Data>
<wps:LiteralData>{{ model_expression }}</wps:LiteralData>
<wps:LiteralData>{{ model_expression|cdata }}</wps:LiteralData>
</wps:Data>
</wps:Input>
{% endif -%}
{% if custom_shc -%}
<wps:Input>
<ows:Identifier>shc</ows:Identifier>
<wps:Data>
<wps:ComplexData>{{ custom_shc }}</wps:ComplexData>
<wps:ComplexData>{{ custom_shc|cdata }}</wps:ComplexData>
</wps:Data>
</wps:Input>
{% endif -%}
Expand All @@ -46,7 +46,7 @@
<wps:Input>
<ows:Identifier>filters</ows:Identifier>
<wps:Data>
<wps:LiteralData>{{ filters }}</wps:LiteralData>
<wps:LiteralData>{{ filters|cdata }}</wps:LiteralData>
</wps:Data>
</wps:Input>
{% endif -%}
Expand Down
2 changes: 1 addition & 1 deletion src/viresclient/_wps/templates/vires_get_model_info.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
<wps:Input>
<ows:Identifier>model_ids</ows:Identifier>
<wps:Data>
<wps:LiteralData>{{ model_expression }}</wps:LiteralData>
<wps:LiteralData>{{ model_expression|cdata }}</wps:LiteralData>
</wps:Data>
</wps:Input>
{% endif -%}
Expand Down

0 comments on commit c2c8d7b

Please sign in to comment.