Skip to content

Commit

Permalink
Add __in filter (#30)
Browse files Browse the repository at this point in the history
* Add __in and null lookups for filter
Princeton-CDH/mep-django#198

* Add test and docs for __in filter
Supports functionality in Princeton-CDH/mep-django#198

* Add facet_field method to SolrQuerySet, for field-specific facet options

* Revise filter lookup base don PR #30

* Add to tests; add ex tag keyword arguments

* Rename ex -> exclude
  • Loading branch information
meg-codes committed Apr 22, 2019
1 parent 36a1814 commit de92e99
Show file tree
Hide file tree
Showing 4 changed files with 205 additions and 34 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,12 @@
CHANGELOG
=========

0.3
---

* Add support for `__in` queries with `filter` method of SolrQuerySet
* Add support for searching for missing/unset values in `filter` method of
SolrQuerySet.

0.2
---
Expand Down
131 changes: 110 additions & 21 deletions parasolr/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,13 @@
if one is not passed in.
"""
from collections import OrderedDict
from typing import Dict, List
import re
from typing import Any, Dict, List

from parasolr.solr import SolrClient
from parasolr.solr.client import QueryResponse


class SolrQuerySet:
"""A Solr queryset object that allows for object oriented
searching and filtering of Solr results. Allows search results
Expand All @@ -37,15 +39,19 @@ class SolrQuerySet:
filter_qs = []
field_list = []
highlight_field = None
facet_field = []
facet_field_list = []
facet_opts = {}
highlight_opts = {}
raw_params = {}


#: by default, combine search queries with AND
default_search_operator = 'AND'

#: any value constant
ANY_VALUE = '[* TO *]'
#: lookup separator
LOOKUP_SEP = '__'

def __init__(self, solr: SolrClient):
# requires solr client so that this version can be django-agnostic
self.solr = solr
Expand Down Expand Up @@ -108,13 +114,15 @@ def query_opts(self) -> Dict[str, str]:
for key, val in self.highlight_opts.items():
query_opts['hl.%s' % key] = val

if self.facet_field:
if self.facet_field_list:
query_opts.update({
'facet': True,
'facet.field': self.facet_field
'facet.field': self.facet_field_list
})
for key, val in self.facet_opts.items():
query_opts['facet.%s' % key] = val
# use key as is if it starts with "f."
# (field-specific facet options); otherwise prepend "facet."
query_opts[key if key.startswith('f.') else 'facet.%s' % key] = val

# include any raw query parameters
query_opts.update(self.raw_params)
Expand Down Expand Up @@ -159,15 +167,63 @@ def get_facets(self) -> Dict[str, int]:
return self.solr.query(**query_opts).facet_counts['facet_fields']

@staticmethod
def _lookup_to_filter(key, value) -> str:
"""Convert keyword argument key=value pair into a Solr filter.
Currently only supports simple case of field:value."""
def _lookup_to_filter(key: str, value: Any, tag: str='') -> str:
"""Convert keyword/value argument, with optional lookups separated by
``__``, including: in and exists. Field names should *NOT* include
double-underscores by convention. Accepts an optional tag argument
to specify an exclude tag as needed.
# NOTE: as needed, we can start implementing django-style filters
# such as __in=[a, b, c] or __range=(start, end)
return '%s:%s' % (key, value)

def filter(self, *args, **kwargs) -> 'SolrQuerySet':
Returns: A propertly formatted Solr query string.
"""
# check for a lookup separator and split
lookup = ''
# format tag for inclusion if tag
if tag:
tag = '{!tag=%s}' % tag
split_key = key.split(SolrQuerySet.LOOKUP_SEP)
if len(split_key) == 1:
# simple lookup, return key,value pair
return '%s%s:%s' % (tag, key, value)
# Implementations of Django-style filters such as __in=[a, b, c]
# or __range=(start, end)

# NOTE: Assuming there is only one LOOKUP_SEP without error handling
key, lookup = split_key

# __in=[a, b, c] filter
if lookup == 'in':
# value is a list, join with OR logic for all values in list,
# treat '' or None values as flagging an exists query
not_exists = False
if '' in value or None in value:
not_exists = True
value = list(filter(lambda x: x not in ['', None], value))
# if we have a case where the list was just a falsy value
# return as if __exists=False
if not value:
return '%s-%s:%s' % (tag, key, SolrQuerySet.ANY_VALUE)
_filter = '%s:(%s)' % (key, ' OR '.join(value))
if not not_exists:
return '%s%s' % (tag, _filter)
else:
# This query handles the fact that query syntax does not
# support the simpler positive case. Instead, we do a
# negative lookup that negates a positive lookup for
# all possible values and double-negates a lookup
# for any filtered values (thus producing a positive)
# The final output is something like:
# -(item_type:[* TO *] OR item_type: book OR periodical)
return '%s-(%s:%s OR -%s)' % (tag, key, SolrQuerySet.ANY_VALUE,
_filter)

# exists=True/False filter
if lookup == 'exists':
# Look for all possible values, and either negative or not,
# depending on the boolean of value.
negate = '' if value else '-'
return '%s%s%s:%s' % (tag, negate, key, SolrQuerySet.ANY_VALUE)

def filter(self, *args, tag: str='', **kwargs) -> 'SolrQuerySet':
"""
Return a new SolrQuerySet with Solr filter queries added.
Multiple filters can be combined either in a single
Expand All @@ -177,20 +233,30 @@ def filter(self, *args, **kwargs) -> 'SolrQuerySet':
queryset.filter(item_type='person').filter(birth_year=1900)
queryset.filter(item_type='person', birth_year=1900)
To provide a filter that should be used in modified, provide
You can also search for pre-defined using lookups (in, exists)::
queryset.filter(item_type__in=['person', 'book'])
queryset.filter(item_type__exists=False)
Tags may be specified for the filter to be used with facet.field
exclusions::
queryset.filter(item_type='person', tag='person')
To provide a filter that should be used unmodified, provide
the exact string of your filter query::
queryset.filter('birth_year:[1800 TO *]')
"""
qs_copy = self._clone()

# any args are treated as filter queries without modification
qs_copy.filter_qs.extend(args)

for key, value in kwargs.items():
qs_copy.filter_qs.append(self._lookup_to_filter(key, value))

qs_copy.filter_qs.append(self._lookup_to_filter(key, value, tag=tag))
return qs_copy

def facet(self, *args: str, **kwargs) -> 'SolrQuerySet':
Expand All @@ -212,12 +278,36 @@ def facet(self, *args: str, **kwargs) -> 'SolrQuerySet':
qs_copy = self._clone()

# cast args tuple to list for consistency with other iterable fields
qs_copy.facet_field = list(args)
qs_copy.facet_field_list = list(args)
# add other kwargs to be prefixed in query_opts
qs_copy.facet_opts.update(kwargs)

return qs_copy

def facet_field(self, field: str, exclude: str='', **kwargs) -> 'SolrQuerySet':
"""
Request faceting for a single field. Returns a new SolrQuerySet
with Solr faceting enabled and the field added to
the list of facet fields. Any keyword arguments will be set
as field-specific facet configurations.
``ex`` will specify a related filter query tag to exclude when
generating counts for the facet.
"""
qs_copy = self._clone()
# append exclude tag if specified
qs_copy.facet_field_list.append('{!ex=%s}%s' % (exclude, field)
if exclude else field)
# prefix any keyword args with the field name
# (facet. prefix added in query_opts)

qs_copy.facet_opts.update({
'f.%s.facet.%s' % (field, opt) : value
for opt, value in kwargs.items()})

return qs_copy

def search(self, *args, **kwargs) -> 'SolrQuerySet':
"""
Return a new SolrQuerySet with search queries added. All
Expand Down Expand Up @@ -329,10 +419,9 @@ def _clone(self) -> 'SolrQuerySet':
qs_copy.field_list = list(self.field_list)
qs_copy.highlight_opts = dict(self.highlight_opts)
qs_copy.raw_params = dict(self.raw_params)
qs_copy.facet_field = list(self.facet_field)
qs_copy.facet_field_list = list(self.facet_field_list)
qs_copy.facet_opts = dict(self.facet_opts)


return qs_copy

def set_limits(self, start, stop):
Expand Down
98 changes: 86 additions & 12 deletions parasolr/tests/test_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def test_query_opts(self):
sqs.field_list = ['title', 'author', 'date:pubyear_i']
sqs.highlight_field = 'content'
sqs.highlight_opts = {'snippets': 3, 'method': 'unified'}
sqs.facet_field = ['item_type', 'member_type']
sqs.facet_field_list = ['item_type', 'member_type']
sqs.facet_opts = {'sort': 'count'}
query_opts = sqs.query_opts()

Expand All @@ -55,9 +55,17 @@ def test_query_opts(self):
assert query_opts['hl.method'] == 'unified'
# make sure faceting opts are preserved
assert query_opts['facet'] is True
assert query_opts['facet.field'] == sqs.facet_field
assert query_opts['facet.field'] == sqs.facet_field_list
assert query_opts['facet.sort'] == 'count'

# field-specific facet unchanged
field_facet_opt = 'f.sort.facet.missing'
sqs.facet_opts = {field_facet_opt: True}
query_opts = sqs.query_opts()
# included unchanged, without extra facet prefix
assert field_facet_opt in query_opts
assert query_opts[field_facet_opt]

def test_query(self):
mocksolr = Mock(spec=SolrClient)
mocksolr.query.return_value.docs = []
Expand Down Expand Up @@ -173,45 +181,74 @@ def test_filter(self):
# original queryset is unchanged
assert not sqs.filter_qs

# keyword arg options converted into filters
filtered_qs = sqs.filter(item_type='work', date=1500)
# keyword arg options converted into filters, except tag, which
# is prepended as a special case.
filtered_qs = sqs.filter(item_type='work', date=1500, tag='workDate')
# returned queryset has the filters
assert 'item_type:work' in filtered_qs.filter_qs
assert 'date:1500' in filtered_qs.filter_qs
assert '{!tag=workDate}item_type:work' in filtered_qs.filter_qs
assert '{!tag=workDate}date:1500' in filtered_qs.filter_qs
# original queryset is unchanged
assert not sqs.filter_qs

# chaining adds to the filters
# chaining adds to the filters, tag is optional and not appended
# if not supplied
filtered_qs = sqs.filter(item_type='work').filter(date=1500) \
.filter('name:he*')
assert 'item_type:work' in filtered_qs.filter_qs
assert 'date:1500' in filtered_qs.filter_qs
assert 'name:he*' in filtered_qs.filter_qs


def test_facet(self):
mocksolr = Mock(spec=SolrClient)
sqs = SolrQuerySet(mocksolr)
# facet a search
facet_list = ['person_type', 'item_type']
faceted_qs = sqs.facet(*facet_list)
# faceting should be set on
assert faceted_qs.facet_field == facet_list
assert faceted_qs.facet_field_list == facet_list
# facet opts and field for original queryset should be unchanged
assert not sqs.facet_opts
assert not sqs.facet_field
assert not sqs.facet_field_list

# a call to another method should leave facet options as is
faceted_qs = faceted_qs.filter(foo='bar')
assert faceted_qs.facet_field== facet_list
assert faceted_qs.facet_field_list == facet_list
# subsequents calls to facet should simply reset list
facet_list = ['foobars']
faceted_qs = faceted_qs.facet(*facet_list)
assert faceted_qs.facet_field == facet_list
assert faceted_qs.facet_field_list == facet_list
# kwargs should simply be set in facet opts
faceted_qs = faceted_qs.facet(*facet_list, sort='count')
assert faceted_qs.facet_field == facet_list
assert faceted_qs.facet_field_list == facet_list
assert faceted_qs.facet_opts['sort'] == 'count'

def test_facet_field(self):
mocksolr = Mock(spec=SolrClient)
sqs = SolrQuerySet(mocksolr)

# add single facet with no extra args
facet_sqs = sqs.facet_field('sort')
# should be in field list
assert 'sort' in facet_sqs.facet_field_list
# not in original
assert 'sort' not in sqs.facet_field_list

# multiple field facets add
multifacet_sqs = facet_sqs.facet_field('title')
assert 'sort' in multifacet_sqs.facet_field_list
assert 'title' in multifacet_sqs.facet_field_list

# facet with field-specific options
facet_sqs = sqs.facet_field('sort', missing=True)
assert 'sort' in facet_sqs.facet_field_list
assert 'f.sort.facet.missing' in facet_sqs.facet_opts

# facet with ex field for exclusions
facet_sqs = sqs.facet_field('sort', exclude='sort')
assert '{!ex=sort}sort' in facet_sqs.facet_field_list


def test_search(self):
mocksolr = Mock(spec=SolrClient)
sqs = SolrQuerySet(mocksolr)
Expand Down Expand Up @@ -419,8 +456,45 @@ class CustomSolrQuerySet(SolrQuerySet):
assert 'name:hem*' in search_sqs.search_qs

def test__lookup_to_filter(self):
# simple key-value
assert SolrQuerySet._lookup_to_filter('item_type', 'work') == \
'item_type:work'
# exists
assert SolrQuerySet._lookup_to_filter('item_type__exists', True) == \
'item_type:[* TO *]'
# does not exist
assert SolrQuerySet._lookup_to_filter('item_type__exists', False) == \
'-item_type:[* TO *]'
# simple __in query
assert SolrQuerySet._lookup_to_filter('item_type__in', ['a', 'b']) == \
'item_type:(a OR b)'
# complex __in query with a negation
assert SolrQuerySet._lookup_to_filter('item_type__in', ['a', 'b', '']) == \
'-(item_type:[* TO *] OR -item_type:(a OR b))'
# __in query with just a negation
assert SolrQuerySet._lookup_to_filter('item_type__in', ['']) == \
'-item_type:[* TO *]'

# test cases with tag
# simple key-value
assert SolrQuerySet._lookup_to_filter('item_type', 'work', tag='type') == \
'{!tag=type}item_type:work'
# exists
assert SolrQuerySet._lookup_to_filter('item_type__exists', True, tag='type') == \
'{!tag=type}item_type:[* TO *]'
# does not exist
assert SolrQuerySet._lookup_to_filter('item_type__exists', False, tag='type') == \
'{!tag=type}-item_type:[* TO *]'
# simple __in query
assert SolrQuerySet._lookup_to_filter('item_type__in', ['a', 'b'], tag='type') == \
'{!tag=type}item_type:(a OR b)'
# complex __in query with a negation
assert SolrQuerySet._lookup_to_filter('item_type__in', ['a', 'b', ''], tag='type') == \
'{!tag=type}-(item_type:[* TO *] OR -item_type:(a OR b))'
# __in query with just a negation
assert SolrQuerySet._lookup_to_filter('item_type__in', [''], tag='type') == \
'{!tag=type}-item_type:[* TO *]'


def test_iter(self):
mocksolr = Mock(spec=SolrClient)
Expand Down
4 changes: 3 additions & 1 deletion pytest.ini
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,6 @@
DJANGO_SETTINGS_MODULE=testsettings
addopts=-p no:parasolr
# look for tests in standard django test location
python_files = "**/tests.py" "**/test_*.py"
python_files = "**/tests.py" "**/test_*.py"
# set testpath for collection speed up
testpath = parasolr

0 comments on commit de92e99

Please sign in to comment.