Skip to content

Commit

Permalink
Add aliased solr queryset (#31)
Browse files Browse the repository at this point in the history
* Refactor parasolr.query into a module

* Basic aliased solr queryset with tests

* Update get_facets to return all facets, not just facet fields

* Return facet results with aliased field names

* Add Django version of AliasedQuerySet

* Add doc string for django version of aliased solr query set
  • Loading branch information
rlskoeser committed Apr 24, 2019
1 parent de92e99 commit 5647d93
Show file tree
Hide file tree
Showing 7 changed files with 329 additions and 9 deletions.
5 changes: 5 additions & 0 deletions parasolr/django.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,3 +85,8 @@ def __init__(self, solr: Optional[SolrClient] = None):
# use passed-in solr client if there is one;
# otherwise, initialize a django solr client
super().__init__(solr or SolrClient())


class AliasedSolrQuerySet(SolrQuerySet, query.AliasedSolrQuerySet):
"""Combination of :class:SolrQuerySet` and
:class:`~parasolr.query.alias_queryset.AliasedSolrQuerySet`"""
2 changes: 2 additions & 0 deletions parasolr/query/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from parasolr.query.queryset import SolrQuerySet
from parasolr.query.aliased_queryset import AliasedSolrQuerySet
109 changes: 109 additions & 0 deletions parasolr/query/aliased_queryset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
from typing import Any, Dict, List

from parasolr.query.queryset import SolrQuerySet


class AliasedSolrQuerySet(SolrQuerySet):
'''Extension of :class:`~parasolr.query.queryset.SolrQuerySet`
with support for aliasing Solr fields to more readable versions
for use in code. To use, extend this class and define a
dictionary of :attr:`field_aliases` with the same syntax you would
when calling :meth:`only`. Those field aliases will be set
as the default initial value for :attr:`field_list`, and aliases
can be used in all extended methods.
'''

#: map of application-specific, readable field names
#: to actual solr fields (i.e. if using dynamic field types)
field_aliases = {}

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
# set default field list based on field_aliases
self.field_list = ['%s:%s' % (key, value)
for key, value in self.field_aliases.items()]

# generate reverse lookup for updating facets & highlights
self.reverse_aliases = {val: key for key, val in self.field_aliases.items()}

def _unalias_args(self, *args):
'''convert alias name to solr field for list of args'''
return [self.field_aliases.get(arg, arg) for arg in args]

def _unalias_kwargs(self, **kwargs):
'''convert alias name to solr field for keys in kwargs'''
return {self.field_aliases.get(key, key): val
for key, val in kwargs.items()}

def _unalias_kwargs_with_lookups(self, **kwargs):
'''convert alias name to solr field for keys in kwargs
with support for __ lookups for filters'''
new_kwargs = {}
for key, val in kwargs.items():
field_parts = key.split(self.LOOKUP_SEP, 1)
# first part is always present = field name
field = field_parts[0]
# get alias for key if there is one
field = self.field_aliases.get(field, field)

# if there is a lookup, add it back to the unaliased field
if len(field_parts) > 1:
field = '%s__%s' % (field, field_parts[1])
new_kwargs[field] = val

return new_kwargs

def filter(self, *args, tag: str='', **kwargs) -> 'AliasedSolrQuerySet':
'''Extend :meth:`parasolr.query.queryset.SolrQuerySet.filter`
to support using aliased field names for keyword argument keys.'''
kwargs = self._unalias_kwargs_with_lookups(**kwargs)
return super().filter(*args, tag=tag, **kwargs)

def facet(self, *args, **kwargs) -> 'AliasedSolrQuerySet':
'''Extend :meth:`parasolr.query.queryset.SolrQuerySet.facet`
to support using aliased field names in args.'''
args = self._unalias_args(*args)
return super().facet(*args, **kwargs)

def facet_field(self, field: str, exclude: str='', **kwargs) -> 'AlaisedSolrQuerySet':
'''Extend :meth:`parasolr.query.queryset.SolrQuerySet.facet_field``
to support using aliased field names for field parameter.'''
field = self.field_aliases.get(field, field)
return super().facet_field(field, exclude=exclude, **kwargs)

def order_by(self, *args) -> 'AliasedSolrQuerySet':
'''Extend :meth:`parasolr.query.queryset.SolrQuerySet.order_by``
to support using aliased field names in sort arguments.'''
args = self._unalias_args(*args)
return super().order_by(*args)

def only(self, *args, **kwargs) -> 'AliasedSolrQuerySet':
'''Extend :meth:`parasolr.query.queryset.SolrQuerySet.only``
to support using aliased field names for args (but not kwargs).'''
args = self._unalias_args(*args)
return super().only(*args, **kwargs)

def highlight(self, field: str, **kwargs) -> 'AliasedSolrQuerySet':
'''Extend :meth:`parasolr.query.queryset.SolrQuerySet.highlight``
to support using aliased field names in kwargs.'''
field = self.field_aliases.get(field, field)
return super().highlight(field, **kwargs)

def get_facets(self) -> Dict[str, int]:
'''Extend :meth:`parasolr.query.queryset.SolrQuerySet.get_facets``
to use aliased field names for facet and range facet keys.'''
facets = super().get_facets()

# replace field names in facet field and facet range
# with aliased field names
for section in ['facet_fields', 'facet_ranges']:
facets[section] = {
self.reverse_aliases.get(field, field): val
for field, val in facets[section].items()
}

return facets

# NOTE: may want to do the same for highlighting also eventually,
# but no immediate need and it's structured differently so
# not as obvious how to handle
9 changes: 5 additions & 4 deletions parasolr/query.py → parasolr/query/queryset.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,16 +146,17 @@ def count(self) -> int:
query_opts['hl'] = False
return self.solr.query(**query_opts).numFound

def get_facets(self) -> Dict[str, int]:
"""Return a dictionary of facets and their values and
counts as key/value pairs.
def get_facets(self) -> Dict[str, Dict]:
"""Return a dictionary of facet information included in the
Solr response. Includes facet fields, facet ranges, etc. Facet
field results are returned as an ordered dict of value and count.
"""
if self._result_cache is not None:
# wrap to process facets and return as dictionary
# for Django template support
qr = QueryResponse(self._result_cache)
# NOTE: using dictionary syntax preserves OrderedDict
return qr.facet_counts['facet_fields']
return qr.facet_counts
# since we just want a dictionary of facet fields, don't populate
# the result cache, no rows needed

Expand Down
177 changes: 177 additions & 0 deletions parasolr/query/tests/test_aliased_queryset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
from unittest import TestCase
from unittest.mock import Mock, patch

from parasolr.query import AliasedSolrQuerySet



class MyAliasedSolrQuerySet(AliasedSolrQuerySet):
"""extended version of AliasedSolrQuerySet for testing"""

#: map app/readable field names to actual solr fields
field_aliases = {
'name': 'name_t',
'year':'year_i',
'has_info':'has_info_b',
}


class TestAliasedSolrQuerySet(TestCase):

def setUp(self):
self.mysqs = MyAliasedSolrQuerySet(solr=Mock())

def test_init(self):
"""field list should be populated from field aliases on init"""
assert self.mysqs.field_list
assert len(self.mysqs.field_list) == len(MyAliasedSolrQuerySet.field_aliases.keys())
for key, val in self.mysqs.field_aliases.items():
assert '%s:%s' % (key, val) in self.mysqs.field_list

# reverse lookup should be populated
assert self.mysqs.reverse_aliases
assert len(self.mysqs.reverse_aliases.keys()) == \
len(MyAliasedSolrQuerySet.field_aliases.keys())
assert self.mysqs.reverse_aliases['name_t'] == 'name'

def test_unalias_args(self):
"""list of aliased args should be converted to solr field"""
unaliased_args = self.mysqs._unalias_args('name', 'year', 'foo')
# lookup from field aliases
assert self.mysqs.field_aliases['name'] in unaliased_args
assert self.mysqs.field_aliases['year'] in unaliased_args
# if not present - used unchanged
assert 'foo' in unaliased_args

def test_unalias_kwargs(self):
"""keys in keyword arguments should be converted to solr field name"""
unaliased_kwargs = self.mysqs._unalias_kwargs(name='Jane', year=1942, foo='bar')
# keys converted
assert self.mysqs.field_aliases['name'] in unaliased_kwargs
assert self.mysqs.field_aliases['year'] in unaliased_kwargs
assert 'foo' in unaliased_kwargs
# values unchanged
assert unaliased_kwargs[self.mysqs.field_aliases['name']] == 'Jane'
assert unaliased_kwargs['foo'] == 'bar'

@patch('parasolr.query.queryset.SolrQuerySet.filter')
def test_filter(self, mock_filter):
# arg only - not modified
self.mysqs.filter('name:foo')
mock_filter.assert_called_with('name:foo', tag='')

# keyworg arg should be unaliased
self.mysqs.filter(name='Jane')
mock_filter.assert_called_with(name_t='Jane', tag='')

# keyworg arg with lookup should also be unaliased
self.mysqs.filter(name__in=['Jane', 'Judy'])
mock_filter.assert_called_with(name_t__in=['Jane', 'Judy'], tag='')

# unknown field should be ignored
self.mysqs.filter(tuesday='wednesday')
mock_filter.assert_called_with(tuesday='wednesday', tag='')

# should work with a tag
self.mysqs.filter('foo:bar', name='Jane', tag='baz')
mock_filter.assert_called_with('foo:bar', name_t='Jane', tag='baz')

@patch('parasolr.query.queryset.SolrQuerySet.facet')
def test_facet(self, mock_filter):
# arg should be unaliased
self.mysqs.facet('name')
mock_filter.assert_called_with(self.mysqs.field_aliases['name'])

# kwrags should be ignored
self.mysqs.facet('name', missing=True)
mock_filter.assert_called_with(self.mysqs.field_aliases['name'],
missing=True)

@patch('parasolr.query.queryset.SolrQuerySet.facet_field')
def test_facet_field(self, mock_facet_field):
# field name should be unaliased
self.mysqs.facet_field('year')
mock_facet_field.assert_called_with(self.mysqs.field_aliases['year'],
exclude='')

# work with exclude and other kwargs
self.mysqs.facet_field('year', exclude=True, missing=True)
mock_facet_field.assert_called_with(self.mysqs.field_aliases['year'],
exclude=True, missing=True)

@patch('parasolr.query.queryset.SolrQuerySet.order_by')
def test_order_by(self, mock_order_by):
# args should be unaliased
self.mysqs.order_by('year')
mock_order_by.assert_called_with(self.mysqs.field_aliases['year'])

@patch('parasolr.query.queryset.SolrQuerySet.only')
def test_only(self, mock_only):
# args should be unaliased
self.mysqs.only('name', 'year')
mock_only.assert_called_with(self.mysqs.field_aliases['name'],
self.mysqs.field_aliases['year'])

# kwargs should be ignored
self.mysqs.only(end_year_i='end_year')
mock_only.assert_called_with(end_year_i='end_year')

@patch('parasolr.query.queryset.SolrQuerySet.highlight')
def test_highlight(self, mock_highlight):
# args should be unaliased
self.mysqs.highlight('name')
mock_highlight.assert_called_with(self.mysqs.field_aliases['name'])
# unknown should be ignored
self.mysqs.highlight('foo_b')
mock_highlight.assert_called_with('foo_b')

@patch('parasolr.query.queryset.SolrQuerySet.get_facets')
def test_get_facets(self, mock_get_facets):
sample_facet_result = {
"facet_fields":{
"has_info_b":[
"false", 5967,
"true", 632],
"other":[
"false", 6,
"true", 4]},
"facet_ranges":{
"year_i":{
"counts":[
"1900", 100,
"1920", 5939,
"1940", 477,
"1960", 6],
"gap":20,
"start":1900,
"end":1980},
"birth":{
"counts":[
"1900", 100,
"1920", 5939,
"1940", 477,
"1960", 6],
"gap":20,
"start":1900,
"end":1980}
}
}
mock_get_facets.return_value = sample_facet_result.copy()

# known keys should be converted to alias
facets = self.mysqs.get_facets()
mock_get_facets.assert_called_with()
# known field alias is updated
assert 'has_info' in facets['facet_fields']
assert facets['facet_fields']['has_info'] == \
sample_facet_result['facet_fields']['has_info_b']
# non-aliased field is ignored
assert 'other' in facets['facet_fields']

# range fields updated with aliases also
assert 'year' in facets['facet_ranges']
assert facets['facet_ranges']['year'] == \
sample_facet_result['facet_ranges']['year_i']
# non-aliased field is ignored
assert 'birth' in facets['facet_ranges']

Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ def test_count(self):
# cache should not be populated
assert not sqs._result_cache

@patch('parasolr.query.QueryResponse')
@patch('parasolr.query.queryset.QueryResponse')
def test_get_facets(self, mockQR):
mocksolr = Mock(spec=SolrClient)
# mock cached solr response
Expand All @@ -143,10 +143,10 @@ def test_get_facets(self, mockQR):
assert mockQR.called
# called with the cached response
mockQR.assert_called_with(mock_response)
# casts return to an OrderedDict
assert isinstance(ret, OrderedDict)
# facet fields should be an OrderedDict
assert isinstance(ret['facet_fields'], OrderedDict)
# return the value of facet_counts.facet_fields
assert ret == OrderedDict(a=1)
assert ret == {'facet_fields': OrderedDict(a=1)}

# now test no cached result
mocksolr.query.return_value = Mock()
Expand Down
28 changes: 27 additions & 1 deletion parasolr/tests/test_django.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
from django.core.exceptions import ImproperlyConfigured
from django.test import override_settings

from parasolr.django import SolrClient, SolrQuerySet
from parasolr.django import SolrClient, SolrQuerySet, \
AliasedSolrQuerySet

except ImportError:
pass
Expand Down Expand Up @@ -83,3 +84,28 @@ def test_django_solrqueryset(mocksolrclient):
sqs = SolrQuerySet(solr=mymocksolr)
assert sqs.solr == mymocksolr
mocksolrclient.assert_not_called()


@skipif_no_django
@patch('parasolr.django.SolrClient')
def test_django_aliasedsolrqueryset(mocksolrclient):

class MyAliasedSolrQuerySet(AliasedSolrQuerySet):
"""extended version of AliasedSolrQuerySet for testing"""

#: map app/readable field names to actual solr fields
field_aliases = {
'name': 'name_t',
'year':'year_i',
'has_info':'has_info_b',
}

# django queryset behavior: auto-initialize solr connection if not specified
mysqs = MyAliasedSolrQuerySet()
mocksolrclient.assert_called_with()
assert mysqs.solr == mocksolrclient.return_value
mocksolrclient.reset_mock()

# alias queryset init: field list and reverse alias lookup populated
assert mysqs.field_list
assert mysqs.reverse_aliases

0 comments on commit 5647d93

Please sign in to comment.