diff --git a/CHANGES.rst b/CHANGES.rst index 320393b..3f2889c 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -5,7 +5,9 @@ 3.0.1 (unreleased) ================== -- Nothing changed yet. +- Fix the ``ExtentFilteredSet`` to only unindex documents that were + previously indexed. This avoids an extra ``readCurrent`` call. See + `issue 12 `_. 3.0.0 (2021-05-12) diff --git a/src/nti/zope_catalog/tests/test_topic.py b/src/nti/zope_catalog/tests/test_topic.py index 5ba1d37..7415830 100644 --- a/src/nti/zope_catalog/tests/test_topic.py +++ b/src/nti/zope_catalog/tests/test_topic.py @@ -8,13 +8,15 @@ # stdlib imports import unittest + +from hamcrest import assert_that +from hamcrest import is_ + from zope.index.topic.filter import PythonFilteredSet from nti.zope_catalog.topic import ExtentFilteredSet from nti.zope_catalog.topic import TopicIndex -from hamcrest import assert_that -from hamcrest import is_ __docformat__ = "restructuredtext en" @@ -123,3 +125,35 @@ def test_ids_and_extent(self): extent_extent = extent.getExtent() assert_that(list(extent_extent.set), is_([1])) + + def test_index_doc_doesnt_unindex_on_ValueError(self): + extent = ExtentFilteredSet('extent', default_expression) + in_none = Context(docid=4) + extent.unindex_doc = lambda *args: self.fail("Should not call") + extent.index_doc(in_none.docid, in_none) + + assert_that(extent.ids(), is_(())) + + def test_index_doc_doesnt_unindex_on_ValueError_with_jar(self): + extent = ExtentFilteredSet('extent', default_expression) + + + in_extent = Context(in_extent=True, docid=1) + in_none = Context(docid=4) + # The Python implementation of BTrees always calls readCurrent, but + # the C implementation doesn't do it if its empty. + extent.index_doc(in_extent.docid, in_extent) + + # Now break the jar + class MockJar(object): + def readCurrent(self, _): + raise AssertionError("Should not call") # pragma: no cover + + extent._ids._p_jar = MockJar() + extent._ids._p_oid = b'12345678' + extent._extent._p_jar = MockJar() + extent._extent._p_oid = b'12345678' + + extent.index_doc(in_none.docid, in_none) + + assert_that(extent.ids(), is_((1,))) diff --git a/src/nti/zope_catalog/topic.py b/src/nti/zope_catalog/topic.py index f63850d..ce432bc 100644 --- a/src/nti/zope_catalog/topic.py +++ b/src/nti/zope_catalog/topic.py @@ -130,7 +130,16 @@ def index_doc(self, docid, context): try: self._extent.add(docid, context) except ValueError: - self.unindex_doc(docid) + # Only unindex if it was found in the index to start with. + # Trying to remove a missing docid still leads to readCurrent() + # being called on the underlying BTree set unnecessarily. + # This leads to traversing the BTree twice in the uncommon case + # of removing an existing object, but that's fine. + # (TODO: Can there be persistence errors that break `in` but let + # `remove()` keep working?) + # See https://github.com/NextThought/nti.zope_catalog/issues/12 + if docid in self._ids: + self.unindex_doc(docid) def getExtent(self): """