Skip to content

Commit

Permalink
Merge pull request #13 from NextThought/issue12
Browse files Browse the repository at this point in the history
Make ExtentFilteredSet.index_doc only unindex values that were in the set
  • Loading branch information
jamadden committed May 13, 2021
2 parents b27fd1a + d29027f commit 940028c
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 4 deletions.
4 changes: 3 additions & 1 deletion CHANGES.rst
Expand Up @@ -5,7 +5,9 @@
3.0.1 (unreleased)
==================

- Nothing changed yet.
- Fix the ``ExtentFilteredSet`` to only unindex documents that were
previously indexed. This avoids an extra ``readCurrent`` call. See
`issue 12 <https://github.com/NextThought/nti.zope_catalog/issues/12>`_.


3.0.0 (2021-05-12)
Expand Down
38 changes: 36 additions & 2 deletions src/nti/zope_catalog/tests/test_topic.py
Expand Up @@ -8,13 +8,15 @@
# stdlib imports
import unittest


from hamcrest import assert_that
from hamcrest import is_

from zope.index.topic.filter import PythonFilteredSet

from nti.zope_catalog.topic import ExtentFilteredSet
from nti.zope_catalog.topic import TopicIndex

from hamcrest import assert_that
from hamcrest import is_

__docformat__ = "restructuredtext en"

Expand Down Expand Up @@ -123,3 +125,35 @@ def test_ids_and_extent(self):
extent_extent = extent.getExtent()
assert_that(list(extent_extent.set),
is_([1]))

def test_index_doc_doesnt_unindex_on_ValueError(self):
extent = ExtentFilteredSet('extent', default_expression)
in_none = Context(docid=4)
extent.unindex_doc = lambda *args: self.fail("Should not call")
extent.index_doc(in_none.docid, in_none)

assert_that(extent.ids(), is_(()))

def test_index_doc_doesnt_unindex_on_ValueError_with_jar(self):
extent = ExtentFilteredSet('extent', default_expression)


in_extent = Context(in_extent=True, docid=1)
in_none = Context(docid=4)
# The Python implementation of BTrees always calls readCurrent, but
# the C implementation doesn't do it if its empty.
extent.index_doc(in_extent.docid, in_extent)

# Now break the jar
class MockJar(object):
def readCurrent(self, _):
raise AssertionError("Should not call") # pragma: no cover

extent._ids._p_jar = MockJar()
extent._ids._p_oid = b'12345678'
extent._extent._p_jar = MockJar()
extent._extent._p_oid = b'12345678'

extent.index_doc(in_none.docid, in_none)

assert_that(extent.ids(), is_((1,)))
11 changes: 10 additions & 1 deletion src/nti/zope_catalog/topic.py
Expand Up @@ -130,7 +130,16 @@ def index_doc(self, docid, context):
try:
self._extent.add(docid, context)
except ValueError:
self.unindex_doc(docid)
# Only unindex if it was found in the index to start with.
# Trying to remove a missing docid still leads to readCurrent()
# being called on the underlying BTree set unnecessarily.
# This leads to traversing the BTree twice in the uncommon case
# of removing an existing object, but that's fine.
# (TODO: Can there be persistence errors that break `in` but let
# `remove()` keep working?)
# See https://github.com/NextThought/nti.zope_catalog/issues/12
if docid in self._ids:
self.unindex_doc(docid)

def getExtent(self):
"""
Expand Down

0 comments on commit 940028c

Please sign in to comment.