Skip to content

Commit

Permalink
Merge pull request #67 from Princeton-CDH/feature/highlight-multiple-…
Browse files Browse the repository at this point in the history
…fields

Add support for highlighting multiple fields
  • Loading branch information
rlskoeser committed Nov 16, 2021
2 parents a18fbcf + c53b603 commit f577609
Show file tree
Hide file tree
Showing 5 changed files with 66 additions and 23 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ CHANGELOG
0.8
---
* Pytest fixture ``mock_solr_queryset`` now takes optional argument for extra methods to include in fluent interface
* ``SolrQuerySet`` now supports highlighting on multiple fields via ``highlight`` method, with per-field highlighting options.
* ``AliasedSolrQuerySet`` now correctly aliases fieldnames in highlighting results.

0.7
---
Expand Down
17 changes: 13 additions & 4 deletions parasolr/query/aliased_queryset.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Dict
from typing import Dict, List

from parasolr.query.queryset import SolrQuerySet

Expand Down Expand Up @@ -130,6 +130,15 @@ def get_stats(self) -> Dict[str, Dict]:
}
return stats

# NOTE: may want to do the same for highlighting also eventually,
# but no immediate need and it's structured differently so
# not as obvious how to handle
def get_highlighting(self) -> Dict[str, Dict[str, List]]:
highlighting = super().get_highlighting()
# highlighting results are keyed on document id
# for each document, there is a dictionary of highlights;
# key is field name, value is the list of snippets
if highlighting:
for doc_id, highlights in highlighting.items():
highlighting[doc_id] = {
self.reverse_aliases.get(field, field): snippets
for field, snippets in highlights.items()
}
return highlighting
23 changes: 14 additions & 9 deletions parasolr/query/queryset.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ class SolrQuerySet:
search_qs = []
filter_qs = []
field_list = []
highlight_field = None
highlight_fields = []
facet_field_list = []
stats_field_list = []
range_facet_fields = []
Expand Down Expand Up @@ -89,13 +89,14 @@ def get_results(self, **kwargs) -> List[dict]:
def _set_highlighting_opts(self, query_opts: Dict) -> None:
"""Configure highlighting attributes on query_opts. Modifies
dictionary directly."""
if self.highlight_field:
if self.highlight_fields:
query_opts.update({
'hl': True,
'hl.fl': self.highlight_field
'hl.fl': ','.join(self.highlight_fields)
})
for key, val in self.highlight_opts.items():
query_opts['hl.%s' % key] = val
# highlighting options should be added as-is
# (prefixes added in highlight methods)
query_opts.update(self.highlight_opts)

def _set_faceting_opts(self, query_opts: Dict) -> None:
"""Configure faceting attributes directly on query_opts. Modifies
Expand Down Expand Up @@ -512,8 +513,12 @@ def highlight(self, field: str, **kwargs) -> 'SolrQuerySet':
queryset.highlight('content', snippets=3, method='unified')
"""
qs_copy = self._clone()
qs_copy.highlight_field = field
qs_copy.highlight_opts = kwargs
qs_copy.highlight_fields.append(field)
# make highlight options field-specific to allow for multiple
qs_copy.highlight_opts.update({
'f.%s.hl.%s' % (field, opt): value
for opt, value in kwargs.items()})

return qs_copy

def raw_query_parameters(self, **kwargs) -> 'SolrQuerySet':
Expand All @@ -524,7 +529,7 @@ def raw_query_parameters(self, **kwargs) -> 'SolrQuerySet':
qs_copy.raw_params.update(kwargs)
return qs_copy

def get_highlighting(self):
def get_highlighting(self) -> Dict[str, Dict[str, List]]:
"""Return the highlighting portion of the Solr response."""
if not self._result_cache:
self.get_results()
Expand Down Expand Up @@ -552,7 +557,7 @@ def _clone(self) -> 'SolrQuerySet':
# set attributes that can be copied directly
qs_copy.start = self.start
qs_copy.stop = self.stop
qs_copy.highlight_field = self.highlight_field
qs_copy.highlight_fields = list(self.highlight_fields)

# set copies of list and dict attributes
qs_copy.search_qs = list(self.search_qs)
Expand Down
26 changes: 26 additions & 0 deletions parasolr/query/tests/test_aliased_queryset.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,32 @@ def test_get_stats(self, mock_get_stats):
mock_get_stats.return_value = None
assert self.mysqs.get_stats() is None

@patch('parasolr.query.queryset.SolrQuerySet.get_highlighting')
def test_get_highlighting(self, mock_get_highlighting):
sample_highlights = {
# In setup for tests, name_t is aliased to name
"item.1": {
"name_t": ["snippet 1", "snippet 2"],
"description_t": ["another snippet"]
}
}
# Deepcopy to avoid the dictionaries being passed by reference
# so we can check against the original object later
mock_get_highlighting.return_value = copy.deepcopy(sample_highlights)
highlights = self.mysqs.get_highlighting()
# aliased field is changed to unaliased form
assert "name_t" not in highlights["item.1"]
assert "name" in highlights["item.1"]
# value of field is preserved without change
assert highlights["item.1"]["name"] \
== sample_highlights["item.1"]["name_t"]
# unaliased field is left alone
assert 'description_t' in highlights["item.1"]

# ensure that if get_stats returns None on error,
# we don't have a key error when try to realias fields
mock_get_highlighting.return_value = None
assert self.mysqs.get_highlighting() is None



21 changes: 11 additions & 10 deletions parasolr/query/tests/test_queryset.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@ def test_query_opts(self):
sqs.filter_qs = ['item_type_s:work']
sqs.search_qs = ['title:reading', 'author:johnson']
sqs.field_list = ['title', 'author', 'date:pubyear_i']
sqs.highlight_field = 'content'
sqs.highlight_opts = {'snippets': 3, 'method': 'unified'}
sqs.highlight_fields = ['content']
sqs.highlight_opts = {'f.content.hl.snippets': 3, 'f.content.hl.method': 'unified'}
sqs.facet_field_list = ['item_type_s', 'member_type']
sqs.facet_opts = {'sort': 'count'}
sqs.stats_field_list = ['item_type_s', 'account_start_i']
Expand All @@ -54,9 +54,9 @@ def test_query_opts(self):
# highlighting should be turned on
assert query_opts['hl']
assert query_opts['hl.fl'] == 'content'
# highlighting options added with hl.prefix
assert query_opts['hl.snippets'] == 3
assert query_opts['hl.method'] == 'unified'
# highlighting options added as-is
assert query_opts['f.content.hl.snippets'] == 3
assert query_opts['f.content.hl.method'] == 'unified'
# make sure faceting opts are preserved
assert query_opts['facet'] is True
assert query_opts['facet.field'] == sqs.facet_field_list
Expand Down Expand Up @@ -468,18 +468,19 @@ def test_highlight(self):
sqs = SolrQuerySet(mocksolr)
# field only, defaults
highlight_qs = sqs.highlight('content')
assert highlight_qs.highlight_field == 'content'
assert highlight_qs.highlight_fields == ['content']
assert highlight_qs.highlight_opts == {}
# original unchanged
assert sqs.highlight_field is None
assert sqs.highlight_fields == []

# field and opts
highlight_qs = sqs.highlight('text', snippets=3, method='unified')
assert highlight_qs.highlight_field == 'text'
assert highlight_qs.highlight_fields == ['text']
print(highlight_qs.highlight_opts)
assert highlight_qs.highlight_opts == \
{'snippets': 3, 'method': 'unified'}
{'f.text.hl.snippets': 3, 'f.text.hl.method': 'unified'}
# original unchanged
assert sqs.highlight_field is None
assert sqs.highlight_fields == []
assert sqs.highlight_opts == {}

def test_raw_query_parameters(self):
Expand Down

0 comments on commit f577609

Please sign in to comment.