Merge pull request #45 from Princeton-CDH/feature/ppa-support

Feature/ppa support
Princeton-CDH · Sep 9, 2020 · 1f5af15 · 1f5af15
2 parents c8267cf + 99fa473
commit 1f5af15
Show file tree

Hide file tree

Showing 17 changed files with 404 additions and 90 deletions.
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -3,6 +3,20 @@
 CHANGELOG
 =========
 
+0.6
+---
+
+* Solr client now escalates 404 errors instead of logging with no exception
+* Schema field declarations now support the `stored` option
+* Schema field type declarations now pass through arbitrary options
+* New method `total_to_index` on `parasolr.indexing.Indexable` to better
+  support indexing content that is returned as a generator
+* Access to expanded results now available on QueryResponse and SolrQuerySet
+* SolrQuerySet no longer wraps return results from `get_stats` and `get_facets` with QueryResponse
+* New last-modified view mixin for use with Django views `parasolr.django.views.SolrLastModifiedMixin`
+* New pytest fixture `mock_solr_queryset` to generate a Mock SolrQuerySet that simulates the SolrQuerySet fluent interface
+
+
 0.5.4
 -----
 

diff --git a/parasolr/django/signals.py b/parasolr/django/signals.py
@@ -122,8 +122,8 @@ def connect():
         for model, options in ModelIndexable.related:
             for signal_name, handler in options.items():
                 model_signal = getattr(models.signals, signal_name)
-                logger.debug('Registering %s signal handler for %s',
-                             signal_name, model)
+                logger.debug('Registering %s signal handler %s for %s',
+                             handler, signal_name, model)
                 model_signal.connect(handler, sender=model)
 
     @staticmethod

diff --git a/parasolr/django/views.py b/parasolr/django/views.py
@@ -0,0 +1,73 @@
+import calendar
+import logging
+
+from django.utils.cache import get_conditional_response
+from django.views.generic.base import View
+
+from parasolr.django import SolrQuerySet
+from parasolr.solr import SolrClientException
+from parasolr.utils import solr_timestamp_to_datetime
+
+
+logger = logging.getLogger(__name__)
+
+
+class SolrLastModifiedMixin(View):
+    """View mixin to add last modified headers based on Solr.
+    By default, searches entire solr collection and returns the most
+    recent last modified value (assumes **last_modified** field).
+    To filter for items specific to your view, either
+    set :attr:`solr_lastmodified_filters` or
+    implement :meth:`get_solr_lastmodified_filters`.
+    """
+
+    #: solr query filter for getting last modified date
+    solr_lastmodified_filters = {}   # by default, find all
+
+    def get_solr_lastmodified_filters(self):
+        '''Get filters for last modified Solr query. By default returns
+        :attr:`solr_lastmodified_filters`.'''
+        return self.solr_lastmodified_filters
+
+    def last_modified(self):
+        '''Return last modified :class:`datetime.datetime` from the
+        specified Solr query'''
+        filter_qs = self.get_solr_lastmodified_filters()
+        sqs = SolrQuerySet().filter(**filter_qs) \
+            .order_by('-last_modified').only('last_modified')
+
+        try:
+            # Solr stores date in isoformat; convert to datetime
+            return solr_timestamp_to_datetime(sqs[0]['last_modified'])
+            # skip extra call to Solr to check count and just grab the first
+            # item if it exists
+        except (IndexError, KeyError, SolrClientException) as err:
+            # if a syntax or other solr error happens, no date to return
+            # report the error, but don't fail since the view may still
+            # be able to render normally
+            logger.error('Failed to retrieve last modified: %s' % err)
+            # TODO: if possible, report view / args / url that triggering
+            # the error
+
+    def dispatch(self, request, *args, **kwargs):
+        '''Wrap the dispatch method to add a last modified header if
+        one is available, then return a conditional response.'''
+
+        # NOTE: this doesn't actually skip view processing,
+        # but without it we could return a not modified for a non-200 response
+        response = super(SolrLastModifiedMixin, self) \
+            .dispatch(request, *args, **kwargs)
+
+        last_modified = self.last_modified()
+        if last_modified:
+            # remove microseconds so that comparison will pass,
+            # since microseconds are not included in the last-modified header
+            last_modified = last_modified.replace(microsecond=0)
+            response['Last-Modified'] = last_modified \
+                .strftime('%a, %d %b %Y %H:%M:%S GMT')
+            # convert the same way django does so that they will
+            # compare correctly
+            last_modified = calendar.timegm(last_modified.utctimetuple())
+
+        return get_conditional_response(request, last_modified=last_modified,
+                                        response=response)
diff --git a/parasolr/indexing.py b/parasolr/indexing.py
@@ -39,6 +39,13 @@ def all_subclasses(cls):
 class Indexable:
     """Mixin for objects that are indexed in Solr.  Subclasses must implement
     `index_id` and `index` methods.
+
+    When implementing an Indexable subclass where items_to_index
+    returns something like a generator, which does not expose either a
+    `count` method or can be counted with `len`, for use with
+    the Django index manage command you should
+    implement `total_to_index` and return the number of items
+    to be indexed.
     """
 
     # NOTE: current implementation is Django-specific, intended for
@@ -78,6 +85,8 @@ def index_item_type(cls):
         across all Indexable items in an application. By default, uses
         Django model verbose name. Used in default index id and
         in index manage command. """
+        # TODO: move this implementation into django subclass?
+        # default could just return an attribute on the class
         return cls._meta.verbose_name
 
     @classmethod
@@ -92,6 +101,17 @@ def items_to_index(cls):
         except AttributeError:
             raise NotImplementedError
 
+    @classmethod
+    def total_to_index(cls):
+        """Get the total number of items to be indexed for a single class of
+        Indexable content. Subclasses should override this method
+        if necessary. By default, returns a Django queryset count for a model.
+        Raises NotImplementedError if that fails."""
+        try:
+            return cls.objects.count()
+        except AttributeError:
+            raise NotImplementedError
+
     def index_id(self):
         """Solr identifier. By default, combines :meth:`index item_type`
         and :attr:`id` with :attr:ID_SEPARATOR`."""
@@ -108,8 +128,7 @@ def index_data(self):
         }
 
     def index(self):
-        """Index the current object in Solr.  Allows passing in
-        parameter, e.g. to set a `commitWithin` value.
+        """Index the current object in Solr.
         """
         self.solr.update.index([self.index_data()])
 

diff --git a/parasolr/management/commands/index.py b/parasolr/management/commands/index.py
@@ -116,18 +116,15 @@ def handle(self, *args, **kwargs):
             # calculate total to index across all indexables for current mode
             for name, model in self.indexables.items():
                 if self.options['index'] in [name, 'all']:
-                    # possibly inefficient to generate the list just
-                    # for a count; should be ok for django queryset implementation,
-                    # hopefully not too bad for other cases
                     items = model.items_to_index()
                     if items:
                         try:
-                            # try count, since it's more effecient for
-                            # django querysets
-                            total_to_index += items.count()
-                        except TypeError:
-                            # if count errors because we have a list,
-                            # use len
+                            # first check for method to provide
+                            # counts for non-models
+                            total_to_index += model.total_to_index()
+                        except (AttributeError, NotImplementedError):
+                            # if count errors because we have a non-model
+                            # indexable or a  list, fall back to len
                             total_to_index += len(items)
 
         # initialize progressbar if requested and indexing more than 5 items
@@ -147,7 +144,8 @@ def handle(self, *args, **kwargs):
             for name, model in self.indexables.items():
                 if self.options['index'] in [name, 'all']:
                     # index in chunks and update progress bar
-                    count += self.index(model.items_to_index(), progbar=progbar)
+                    count += self.index(model.items_to_index(),
+                                        progbar=progbar)
 
         if progbar:
             progbar.finish()

diff --git a/parasolr/pytest_plugin.py b/parasolr/pytest_plugin.py
@@ -1,4 +1,6 @@
 import logging
+from time import sleep
+from unittest.mock import MagicMock, Mock
 
 import pytest
 
@@ -11,11 +13,13 @@
     django = None
 
 import parasolr.django as parasolr_django
+from parasolr.query.queryset import SolrQuerySet
 from parasolr.schema import SolrSchema
 
 
 logger = logging.getLogger(__name__)
 
+
 # NOTE: pytest plugins must be conditionally defined to avoid errors
 # (requires_django decorator does not work)
 if django:
@@ -32,7 +36,7 @@ def get_test_solr_config():
 
         # if no solr connection is configured, bail out
         if not getattr(settings, 'SOLR_CONNECTIONS', None):
-            logger.warn('No Solr configuration found')
+            logger.warning('No Solr configuration found')
             return
 
         # copy default config for basic connection options (e.g. url)
@@ -114,5 +118,61 @@ def configure_django_test_solr():
 
     @pytest.fixture
     def empty_solr():
-        # pytest solr fixture; updates solr schema
+        '''pytest fixture to clear out all content from configured Solr'''
         parasolr_django.SolrClient().update.delete_by_query('*:*')
+        while(parasolr_django.SolrQuerySet().count() != 0):
+            # sleep until we get records back; 0.1 seems to be enough
+            # for local dev with local Solr
+            sleep(0.1)
+
+
+def get_mock_solr_queryset(spec=SolrQuerySet):
+    mock_qs = MagicMock(spec=spec)
+
+    # simulate fluent interface
+    for meth in ['filter', 'facet', 'stats', 'facet_field', 'facet_range',
+                 'search', 'order_by', 'query', 'only', 'also',
+                 'highlight', 'raw_query_parameters', 'all', 'none']:
+        getattr(mock_qs, meth).return_value = mock_qs
+
+    return Mock(return_value=mock_qs)
+
+
+@pytest.fixture
+def mock_solr_queryset(request):
+    '''Fixture to provide a :class:`unitest.mock.Mock` for
+    :class:`~parasolr.query.queryset.SolrQuerySet` that simplifies
+    testing against a mocked version of the fluent interface. It returns
+    a method to generate a Mock queryset class; the method has an
+    optional parameter for a queryset subclass to use for the `spec`
+    argument to Mock.
+
+    If called from a class or function where the request provides access
+    to a class, the mock generator method `mock_solr_queryset` will be
+    added to the class as a static method.
+
+    Example uses:
+
+        @pytest.mark.usefixtures("mock_solr_queryset")
+        class MyTestCase(TestCase):
+
+            def test_my_solr_method(self):
+
+                with patch('parasolr.queryset.SolrQuerySet',
+                       new=self.mock_solr_queryset()) as mock_queryset_cls:
+
+                    mock_qs = mock_queryset_cls.return_value
+                    mock_qs.search.assert_any_call(text='my test search')
+
+    To use with a custom queryset subclass::
+
+        mock_qs = self.mock_solr_queryset(MySolrQuerySet)
+
+    '''
+
+    # if scope is class or function and there is a class available,
+    # convert the mock generator to a static method and set it on the class
+    if request.scope in ['class', 'function'] and \
+       getattr(request, 'cls', None):
+        request.cls.mock_solr_queryset = staticmethod(get_mock_solr_queryset)
+    return get_mock_solr_queryset
diff --git a/parasolr/query/queryset.py b/parasolr/query/queryset.py
@@ -100,7 +100,7 @@ def _set_highlighting_opts(self, query_opts: Dict) -> None:
     def _set_faceting_opts(self, query_opts: Dict) -> None:
         """Configure faceting attributes directly on query_opts. Modifies
         dictionary directly."""
-        if self.facet_field_list or self.range_facet_fields:
+        if self.facet_field_list or self.range_facet_fields or self.facet_opts:
             query_opts.update({
                 'facet': True,
                 'facet.field': self.facet_field_list,
@@ -168,7 +168,7 @@ def count(self) -> int:
         """Total number of results for the current query"""
 
         # if result cache is already populated, use it
-        if self._result_cache is not None:
+        if self._result_cache:
             return self._result_cache.numFound
 
         # otherwise, query with current options but request zero rows
@@ -191,15 +191,11 @@ def get_facets(self) -> Dict[str, Dict]:
         Solr response. Includes facet fields, facet ranges, etc. Facet
         field results are returned as an ordered dict of value and count.
         """
-        if self._result_cache is not None:
-            # wrap to process facets and return as dictionary
-            # for Django template support
-            qr = QueryResponse(self._result_cache)
-            # NOTE: using dictionary syntax preserves OrderedDict
-            return qr.facet_counts
+        if self._result_cache:
+            return self._result_cache.facet_counts
+
         # since we just want a dictionary of facet fields, don't populate
         # the result cache, no rows needed
-
         query_opts = self.query_opts()
         query_opts['rows'] = 0
         query_opts['hl'] = False
@@ -213,17 +209,22 @@ def get_facets(self) -> Dict[str, Dict]:
     def get_stats(self) -> Optional[Dict[str, ParasolrDict]]:
         """Return a dictionary of stats information in Solr format or None
         on error."""
-        if self._result_cache is not None:
-            qr = QueryResponse(self._result_cache)
-            return qr.stats
-        query_opts = self.query_opts()
-        query_opts['rows'] = 0
-        query_opts['hl'] = False
+        if self._result_cache:
+            return self._result_cache.stats
 
-        response = self.solr.query(**query_opts)
+        response = self.solr.query(rows=0, hl=False)
         if response:
             return response.stats
 
+    def get_expanded(self) -> Dict[str, Dict]:
+        """Return a dictionary of expanded records included in the
+        Solr response.
+        """
+        if not self._result_cache:
+            self.get_results()
+
+        return self._result_cache.expanded
+
     @staticmethod
     def _lookup_to_filter(key: str, value: Any, tag: str = '') -> str:
         """Convert keyword/value argument, with optional lookups separated by
@@ -590,7 +591,7 @@ def __getitem__(self, k):
 
         # if the result cache is already populated,
         # return the requested index or slice
-        if self._result_cache is not None:
+        if self._result_cache:
             return self._result_cache.docs[k]
 
         qs_copy = self._clone()