Skip to content

Commit

Permalink
Merge pull request #45 from Princeton-CDH/feature/ppa-support
Browse files Browse the repository at this point in the history
Feature/ppa support
  • Loading branch information
rlskoeser committed Sep 9, 2020
2 parents c8267cf + 99fa473 commit 1f5af15
Show file tree
Hide file tree
Showing 17 changed files with 404 additions and 90 deletions.
14 changes: 14 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,20 @@
CHANGELOG
=========

0.6
---

* Solr client now escalates 404 errors instead of logging with no exception
* Schema field declarations now support the `stored` option
* Schema field type declarations now pass through arbitrary options
* New method `total_to_index` on `parasolr.indexing.Indexable` to better
support indexing content that is returned as a generator
* Access to expanded results now available on QueryResponse and SolrQuerySet
* SolrQuerySet no longer wraps return results from `get_stats` and `get_facets` with QueryResponse
* New last-modified view mixin for use with Django views `parasolr.django.views.SolrLastModifiedMixin`
* New pytest fixture `mock_solr_queryset` to generate a Mock SolrQuerySet that simulates the SolrQuerySet fluent interface


0.5.4
-----

Expand Down
4 changes: 2 additions & 2 deletions parasolr/django/signals.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,8 +122,8 @@ def connect():
for model, options in ModelIndexable.related:
for signal_name, handler in options.items():
model_signal = getattr(models.signals, signal_name)
logger.debug('Registering %s signal handler for %s',
signal_name, model)
logger.debug('Registering %s signal handler %s for %s',
handler, signal_name, model)
model_signal.connect(handler, sender=model)

@staticmethod
Expand Down
73 changes: 73 additions & 0 deletions parasolr/django/views.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import calendar
import logging

from django.utils.cache import get_conditional_response
from django.views.generic.base import View

from parasolr.django import SolrQuerySet
from parasolr.solr import SolrClientException
from parasolr.utils import solr_timestamp_to_datetime


logger = logging.getLogger(__name__)


class SolrLastModifiedMixin(View):
"""View mixin to add last modified headers based on Solr.
By default, searches entire solr collection and returns the most
recent last modified value (assumes **last_modified** field).
To filter for items specific to your view, either
set :attr:`solr_lastmodified_filters` or
implement :meth:`get_solr_lastmodified_filters`.
"""

#: solr query filter for getting last modified date
solr_lastmodified_filters = {} # by default, find all

def get_solr_lastmodified_filters(self):
'''Get filters for last modified Solr query. By default returns
:attr:`solr_lastmodified_filters`.'''
return self.solr_lastmodified_filters

def last_modified(self):
'''Return last modified :class:`datetime.datetime` from the
specified Solr query'''
filter_qs = self.get_solr_lastmodified_filters()
sqs = SolrQuerySet().filter(**filter_qs) \
.order_by('-last_modified').only('last_modified')

try:
# Solr stores date in isoformat; convert to datetime
return solr_timestamp_to_datetime(sqs[0]['last_modified'])
# skip extra call to Solr to check count and just grab the first
# item if it exists
except (IndexError, KeyError, SolrClientException) as err:
# if a syntax or other solr error happens, no date to return
# report the error, but don't fail since the view may still
# be able to render normally
logger.error('Failed to retrieve last modified: %s' % err)
# TODO: if possible, report view / args / url that triggering
# the error

def dispatch(self, request, *args, **kwargs):
'''Wrap the dispatch method to add a last modified header if
one is available, then return a conditional response.'''

# NOTE: this doesn't actually skip view processing,
# but without it we could return a not modified for a non-200 response
response = super(SolrLastModifiedMixin, self) \
.dispatch(request, *args, **kwargs)

last_modified = self.last_modified()
if last_modified:
# remove microseconds so that comparison will pass,
# since microseconds are not included in the last-modified header
last_modified = last_modified.replace(microsecond=0)
response['Last-Modified'] = last_modified \
.strftime('%a, %d %b %Y %H:%M:%S GMT')
# convert the same way django does so that they will
# compare correctly
last_modified = calendar.timegm(last_modified.utctimetuple())

return get_conditional_response(request, last_modified=last_modified,
response=response)
23 changes: 21 additions & 2 deletions parasolr/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,13 @@ def all_subclasses(cls):
class Indexable:
"""Mixin for objects that are indexed in Solr. Subclasses must implement
`index_id` and `index` methods.
When implementing an Indexable subclass where items_to_index
returns something like a generator, which does not expose either a
`count` method or can be counted with `len`, for use with
the Django index manage command you should
implement `total_to_index` and return the number of items
to be indexed.
"""

# NOTE: current implementation is Django-specific, intended for
Expand Down Expand Up @@ -78,6 +85,8 @@ def index_item_type(cls):
across all Indexable items in an application. By default, uses
Django model verbose name. Used in default index id and
in index manage command. """
# TODO: move this implementation into django subclass?
# default could just return an attribute on the class
return cls._meta.verbose_name

@classmethod
Expand All @@ -92,6 +101,17 @@ def items_to_index(cls):
except AttributeError:
raise NotImplementedError

@classmethod
def total_to_index(cls):
"""Get the total number of items to be indexed for a single class of
Indexable content. Subclasses should override this method
if necessary. By default, returns a Django queryset count for a model.
Raises NotImplementedError if that fails."""
try:
return cls.objects.count()
except AttributeError:
raise NotImplementedError

def index_id(self):
"""Solr identifier. By default, combines :meth:`index item_type`
and :attr:`id` with :attr:ID_SEPARATOR`."""
Expand All @@ -108,8 +128,7 @@ def index_data(self):
}

def index(self):
"""Index the current object in Solr. Allows passing in
parameter, e.g. to set a `commitWithin` value.
"""Index the current object in Solr.
"""
self.solr.update.index([self.index_data()])

Expand Down
18 changes: 8 additions & 10 deletions parasolr/management/commands/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,18 +116,15 @@ def handle(self, *args, **kwargs):
# calculate total to index across all indexables for current mode
for name, model in self.indexables.items():
if self.options['index'] in [name, 'all']:
# possibly inefficient to generate the list just
# for a count; should be ok for django queryset implementation,
# hopefully not too bad for other cases
items = model.items_to_index()
if items:
try:
# try count, since it's more effecient for
# django querysets
total_to_index += items.count()
except TypeError:
# if count errors because we have a list,
# use len
# first check for method to provide
# counts for non-models
total_to_index += model.total_to_index()
except (AttributeError, NotImplementedError):
# if count errors because we have a non-model
# indexable or a list, fall back to len
total_to_index += len(items)

# initialize progressbar if requested and indexing more than 5 items
Expand All @@ -147,7 +144,8 @@ def handle(self, *args, **kwargs):
for name, model in self.indexables.items():
if self.options['index'] in [name, 'all']:
# index in chunks and update progress bar
count += self.index(model.items_to_index(), progbar=progbar)
count += self.index(model.items_to_index(),
progbar=progbar)

if progbar:
progbar.finish()
Expand Down
64 changes: 62 additions & 2 deletions parasolr/pytest_plugin.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import logging
from time import sleep
from unittest.mock import MagicMock, Mock

import pytest

Expand All @@ -11,11 +13,13 @@
django = None

import parasolr.django as parasolr_django
from parasolr.query.queryset import SolrQuerySet
from parasolr.schema import SolrSchema


logger = logging.getLogger(__name__)


# NOTE: pytest plugins must be conditionally defined to avoid errors
# (requires_django decorator does not work)
if django:
Expand All @@ -32,7 +36,7 @@ def get_test_solr_config():

# if no solr connection is configured, bail out
if not getattr(settings, 'SOLR_CONNECTIONS', None):
logger.warn('No Solr configuration found')
logger.warning('No Solr configuration found')
return

# copy default config for basic connection options (e.g. url)
Expand Down Expand Up @@ -114,5 +118,61 @@ def configure_django_test_solr():

@pytest.fixture
def empty_solr():
# pytest solr fixture; updates solr schema
'''pytest fixture to clear out all content from configured Solr'''
parasolr_django.SolrClient().update.delete_by_query('*:*')
while(parasolr_django.SolrQuerySet().count() != 0):
# sleep until we get records back; 0.1 seems to be enough
# for local dev with local Solr
sleep(0.1)


def get_mock_solr_queryset(spec=SolrQuerySet):
mock_qs = MagicMock(spec=spec)

# simulate fluent interface
for meth in ['filter', 'facet', 'stats', 'facet_field', 'facet_range',
'search', 'order_by', 'query', 'only', 'also',
'highlight', 'raw_query_parameters', 'all', 'none']:
getattr(mock_qs, meth).return_value = mock_qs

return Mock(return_value=mock_qs)


@pytest.fixture
def mock_solr_queryset(request):
'''Fixture to provide a :class:`unitest.mock.Mock` for
:class:`~parasolr.query.queryset.SolrQuerySet` that simplifies
testing against a mocked version of the fluent interface. It returns
a method to generate a Mock queryset class; the method has an
optional parameter for a queryset subclass to use for the `spec`
argument to Mock.
If called from a class or function where the request provides access
to a class, the mock generator method `mock_solr_queryset` will be
added to the class as a static method.
Example uses:
@pytest.mark.usefixtures("mock_solr_queryset")
class MyTestCase(TestCase):
def test_my_solr_method(self):
with patch('parasolr.queryset.SolrQuerySet',
new=self.mock_solr_queryset()) as mock_queryset_cls:
mock_qs = mock_queryset_cls.return_value
mock_qs.search.assert_any_call(text='my test search')
To use with a custom queryset subclass::
mock_qs = self.mock_solr_queryset(MySolrQuerySet)
'''

# if scope is class or function and there is a class available,
# convert the mock generator to a static method and set it on the class
if request.scope in ['class', 'function'] and \
getattr(request, 'cls', None):
request.cls.mock_solr_queryset = staticmethod(get_mock_solr_queryset)
return get_mock_solr_queryset
35 changes: 18 additions & 17 deletions parasolr/query/queryset.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def _set_highlighting_opts(self, query_opts: Dict) -> None:
def _set_faceting_opts(self, query_opts: Dict) -> None:
"""Configure faceting attributes directly on query_opts. Modifies
dictionary directly."""
if self.facet_field_list or self.range_facet_fields:
if self.facet_field_list or self.range_facet_fields or self.facet_opts:
query_opts.update({
'facet': True,
'facet.field': self.facet_field_list,
Expand Down Expand Up @@ -168,7 +168,7 @@ def count(self) -> int:
"""Total number of results for the current query"""

# if result cache is already populated, use it
if self._result_cache is not None:
if self._result_cache:
return self._result_cache.numFound

# otherwise, query with current options but request zero rows
Expand All @@ -191,15 +191,11 @@ def get_facets(self) -> Dict[str, Dict]:
Solr response. Includes facet fields, facet ranges, etc. Facet
field results are returned as an ordered dict of value and count.
"""
if self._result_cache is not None:
# wrap to process facets and return as dictionary
# for Django template support
qr = QueryResponse(self._result_cache)
# NOTE: using dictionary syntax preserves OrderedDict
return qr.facet_counts
if self._result_cache:
return self._result_cache.facet_counts

# since we just want a dictionary of facet fields, don't populate
# the result cache, no rows needed

query_opts = self.query_opts()
query_opts['rows'] = 0
query_opts['hl'] = False
Expand All @@ -213,17 +209,22 @@ def get_facets(self) -> Dict[str, Dict]:
def get_stats(self) -> Optional[Dict[str, ParasolrDict]]:
"""Return a dictionary of stats information in Solr format or None
on error."""
if self._result_cache is not None:
qr = QueryResponse(self._result_cache)
return qr.stats
query_opts = self.query_opts()
query_opts['rows'] = 0
query_opts['hl'] = False
if self._result_cache:
return self._result_cache.stats

response = self.solr.query(**query_opts)
response = self.solr.query(rows=0, hl=False)
if response:
return response.stats

def get_expanded(self) -> Dict[str, Dict]:
"""Return a dictionary of expanded records included in the
Solr response.
"""
if not self._result_cache:
self.get_results()

return self._result_cache.expanded

@staticmethod
def _lookup_to_filter(key: str, value: Any, tag: str = '') -> str:
"""Convert keyword/value argument, with optional lookups separated by
Expand Down Expand Up @@ -590,7 +591,7 @@ def __getitem__(self, k):

# if the result cache is already populated,
# return the requested index or slice
if self._result_cache is not None:
if self._result_cache:
return self._result_cache.docs[k]

qs_copy = self._clone()
Expand Down

0 comments on commit 1f5af15

Please sign in to comment.