Skip to content

Commit

Permalink
Basic facet support (#28)
Browse files Browse the repository at this point in the history
* Add basic queryset faceting
Princeton-CDH/mep-django#199

* Add tests for new queryset methods Princeton-CDH/mep-django#199

* Bump solr ver to 6.6.6

* Fix errata/documentation from PR review

* Refactor solr tests to submodules #21

* Fix query tests to use QueryResponse

* Add check for facet_opts in query_opts

* Fix error with conflicting kwargs; test ParasolrDict

* Fix breaking typo in query

* Add test case for original queryset unchanged after adding facets

* Properly copy facet_opts on clone

* Revise facet to use unprefixed kwargs; set facet in query_opts
  • Loading branch information
meg-codes authored and rlskoeser committed Apr 11, 2019
1 parent b102eab commit 4601c40
Show file tree
Hide file tree
Showing 11 changed files with 913 additions and 700 deletions.
8 changes: 4 additions & 4 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@ python:
- '3.5'
- '3.6'
env:
- SOLR_VERSION=6.6.5
- DJANGO=1.11 SOLR_VERSION=6.6.5
- DJANGO=2.0 SOLR_VERSION=6.6.5
- DJANGO=2.1 SOLR_VERSION=6.6.5
- SOLR_VERSION=6.6.6
- DJANGO=1.11 SOLR_VERSION=6.6.6
- DJANGO=2.0 SOLR_VERSION=6.6.6
- DJANGO=2.1 SOLR_VERSION=6.6.6
before_install:
- pip install --upgrade pip
- pip install --upgrade pytest
Expand Down
79 changes: 71 additions & 8 deletions parasolr/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,11 @@
which will automatically initialize a new :class:`parasolr.django.SolrClient`
if one is not passed in.
"""

from collections import OrderedDict
from typing import Dict, List

from parasolr.solr import SolrClient

from parasolr.solr.client import QueryResponse

class SolrQuerySet:
"""A Solr queryset object that allows for object oriented
Expand All @@ -37,9 +37,12 @@ class SolrQuerySet:
filter_qs = []
field_list = []
highlight_field = None
facet_field = []
facet_opts = {}
highlight_opts = {}
raw_params = {}


#: by default, combine search queries with AND
default_search_operator = 'AND'

Expand Down Expand Up @@ -70,8 +73,8 @@ def get_results(self, **kwargs) -> List[dict]:

# NOTE: django templates choke on AttrDict because it is
# callable; using dictionary response instead
self._result_cache = self.solr.query(wrap=False, **query_opts)
return self._result_cache['response']['docs']
self._result_cache = self.solr.query(**query_opts)
return [doc.as_dict() for doc in self._result_cache.docs]

def query_opts(self) -> Dict[str, str]:
"""Construct query options based on current queryset configuration.
Expand Down Expand Up @@ -105,6 +108,14 @@ def query_opts(self) -> Dict[str, str]:
for key, val in self.highlight_opts.items():
query_opts['hl.%s' % key] = val

if self.facet_field:
query_opts.update({
'facet': True,
'facet.field': self.facet_field
})
for key, val in self.facet_opts.items():
query_opts['facet.%s' % key] = val

# include any raw query parameters
query_opts.update(self.raw_params)

Expand All @@ -115,13 +126,37 @@ def count(self) -> int:

# if result cache is already populated, use it
if self._result_cache is not None:
return self._result_cache['response']['numFound']
return self._result_cache.numFound

# otherwise, query with current options but request zero rows
# and do not populate the result cache
query_opts = self.query_opts()
# setting these by dictionary assignment, because conflicting
# kwargs results in a Python exception
query_opts['rows'] = 0
return self.solr.query(**query_opts, wrap=False)['response']['numFound']
query_opts['facet'] = False
query_opts['hl'] = False
return self.solr.query(**query_opts).numFound

def get_facets(self) -> Dict[str, int]:
"""Return a dictionary of facets and their values and
counts as key/value pairs.
"""
if self._result_cache is not None:
# wrap to process facets and return as dictionary
# for Django template support
qr = QueryResponse(self._result_cache)
# NOTE: using dictionary syntax preserves OrderedDict
return qr.facet_counts['facet_fields']
# since we just want a dictionary of facet fields, don't populate
# the result cache, no rows needed

query_opts = self.query_opts()
query_opts['rows'] = 0
query_opts['hl'] = False
# setting these by dictionary assignment, because conflicting
# kwargs results in a Python exception
return self.solr.query(**query_opts).facet_counts['facet_fields']

@staticmethod
def _lookup_to_filter(key, value) -> str:
Expand Down Expand Up @@ -158,6 +193,31 @@ def filter(self, *args, **kwargs) -> 'SolrQuerySet':

return qs_copy

def facet(self, *args: str, **kwargs) -> 'SolrQuerySet':
"""
Request facets for specified fields. Returns a new SolrQuerySet
with Solr faceting enabled and facet.field parameter set. Does not
support ranged faceting.
Subsequent calls will reset the facet.field to the last set of
args in the chain.
For example::
qs = queryset.facet('person_type', 'age')
qs = qs.facet('item_type')
would result in `item_type` being the only facet field.
"""
qs_copy = self._clone()

# cast args tuple to list for consistency with other iterable fields
qs_copy.facet_field = list(args)
# add other kwargs to be prefixed in query_opts
qs_copy.facet_opts.update(kwargs)

return qs_copy

def search(self, *args, **kwargs) -> 'SolrQuerySet':
"""
Return a new SolrQuerySet with search queries added. All
Expand Down Expand Up @@ -262,13 +322,16 @@ def _clone(self) -> 'SolrQuerySet':
qs_copy.stop = self.stop
qs_copy.highlight_field = self.highlight_field

# set copies of list attributes
# set copies of list and dict attributes
qs_copy.search_qs = list(self.search_qs)
qs_copy.filter_qs = list(self.filter_qs)
qs_copy.sort_options = list(self.sort_options)
qs_copy.field_list = list(self.field_list)
qs_copy.highlight_opts = dict(self.highlight_opts)
qs_copy.raw_params = dict(self.raw_params)
qs_copy.facet_field = list(self.facet_field)
qs_copy.facet_opts = dict(self.facet_opts)


return qs_copy

Expand Down Expand Up @@ -303,7 +366,7 @@ def __getitem__(self, k):
# if the result cache is already populated,
# return the requested index or slice
if self._result_cache is not None:
return self._result_cache['response']['docs'][k]
return self._result_cache.docs[k]

qs_copy = self._clone()

Expand Down
33 changes: 25 additions & 8 deletions parasolr/solr/client.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from collections import OrderedDict
import logging
from typing import Any, Optional
from typing import Any, Dict, Optional

from attrdict import AttrDict
import requests
Expand All @@ -19,15 +19,32 @@
# despite not being hugely Pythonic, for consistency with Solr's responses
# and API documentation.

class QueryReponse:
class ParasolrDict(AttrDict):
"""A subclass of :class:`attrdict.AttrDict` that can convert itself to a
regular dictionary."""

def as_dict(self):
"""Copy attributes from self as a dictionary, and recursively convert
instances of :class:`ParasolrDict`."""
copy = {}
for k, v in self.items():
if isinstance(v, ParasolrDict):
copy[k] = v.as_dict()
else:
copy[k] = v
return copy

class QueryResponse:
"""Thin wrapper to give access to Solr select responses.
Args:
response: A Solr query response
"""
def __init__(self, response: AttrDict) -> None:
self.numFound = response.response.numFound
self.start = response.response.start
def __init__(self, response: Dict) -> None:
# cast to ParasolrDict for any dict-like object
response = ParasolrDict(response)
self.numFound = int(response.response.numFound)
self.start = int(response.response.start)
self.docs = response.response.docs
self.params = response.responseHeader.params
self.facet_counts = {}
Expand All @@ -38,7 +55,7 @@ def __init__(self, response: AttrDict) -> None:
self._process_facet_counts(response.facet_counts)
# NOTE: To access facet_counts.facet_fields or facet_counts.facet_ranges
# as OrderedDicts, you must use dict notation (or AttrDict *will*
# convert.
# convert).

def _process_facet_counts(self, facet_counts: AttrDict) \
-> AttrDict:
Expand Down Expand Up @@ -119,7 +136,7 @@ def __init__(self, solr_url: str, collection: str,
self.core_admin_handler,
self.session)

def query(self, wrap: bool = True, **kwargs: Any) -> Optional[QueryReponse]:
def query(self, wrap: bool = True, **kwargs: Any) -> Optional[QueryResponse]:
"""Perform a query with the specified kwargs.
Args:
Expand All @@ -140,4 +157,4 @@ def query(self, wrap: bool = True, **kwargs: Any) -> Optional[QueryReponse]:
)
if response:
# queries return the search response for now
return QueryReponse(response) if wrap else response
return QueryResponse(response) if wrap else response

0 comments on commit 4601c40

Please sign in to comment.