From 7cb57d194b5ec874bea0a71e43d4a65939c2a5d1 Mon Sep 17 00:00:00 2001 From: Joe Amditis Date: Tue, 24 Mar 2026 15:32:05 -0400 Subject: [PATCH 1/2] fix: return next=None on last page of search results MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For page-number pagination, request per_page + 1 rows from Solr and use the extra row's presence to determine if more results exist. This is more reliable than using results.hits which can be approximate. The cursor pagination path is unchanged — Solr's nextCursorMark equality check already correctly detects the last page. Fixes MuckRock/documentcloud#372 --- documentcloud/documents/search.py | 19 +++++++++++------ documentcloud/documents/tests/test_search.py | 22 ++++++++++++++++++++ 2 files changed, 35 insertions(+), 6 deletions(-) diff --git a/documentcloud/documents/search.py b/documentcloud/documents/search.py index facc8e98..61142988 100644 --- a/documentcloud/documents/search.py +++ b/documentcloud/documents/search.py @@ -3,7 +3,6 @@ from django.http.request import QueryDict # Standard Library -import math import re from collections import defaultdict from datetime import datetime @@ -596,7 +595,8 @@ def _paginate_page(query_params, user): f"The selected `page` value of {page} is over the limit of {max_page}" ) start = (page - 1) * rows - return {"rows": rows, "start": start}, {"page": page, "rows": rows} + # Request one extra row to detect whether a next page exists + return {"rows": rows + 1, "start": start}, {"page": page, "rows": rows} def _paginate_cursor(query_params, user): @@ -615,7 +615,7 @@ def _paginate_cursor(query_params, user): CursorPagination.page_size, max_value=max_page_size, ) - return {"rows": rows, "cursorMark": cursor}, {} + return {"rows": rows, "cursorMark": cursor}, {"rows": rows} def _format_response(results, query_params, user, escaped, page_data): @@ -623,11 +623,16 @@ def _format_response(results, query_params, user, escaped, page_data): base_url = f"{settings.DOCCLOUD_API_URL}/api/documents/search/" query_params = query_params.copy() + per_page = page_data["rows"] + if "page" in page_data: page = page_data["page"] - per_page = page_data["rows"] - max_page = math.ceil(results.hits / per_page) - if page < max_page: + # For page-number pagination, we requested per_page + 1 rows to + # detect whether more results exist without relying on hit counts. + has_more = len(results.docs) > per_page + results.docs = results.docs[:per_page] + + if has_more: query_params["page"] = page + 1 next_url = f"{base_url}?{query_params.urlencode()}" else: @@ -639,6 +644,8 @@ def _format_response(results, query_params, user, escaped, page_data): else: previous_url = None else: + # For cursor pagination, Solr's nextCursorMark equality check + # reliably detects the last page without needing the N+1 trick. if query_params.get("cursor", "*") == results.nextCursorMark: next_url = None else: diff --git a/documentcloud/documents/tests/test_search.py b/documentcloud/documents/tests/test_search.py index 376b6446..315908eb 100644 --- a/documentcloud/documents/tests/test_search.py +++ b/documentcloud/documents/tests/test_search.py @@ -167,6 +167,28 @@ def test_search_page(self): response = self.search(str(url.query)) self.assert_documents(response["results"], slice_=slice(10, 20)) + def test_search_last_page_next_is_none(self): + """Test that the last page of results has next=None""" + + # With 11 documents and per_page=10, page 1 should have next + response = self.search("") + assert response["next"] is not None + assert response["count"] == 11 + + # Page 2 (the last page) should have next=None + url = furl(response["next"]) + response = self.search(str(url.query)) + assert response["next"] is None + assert len(response["results"]) == 1 + + def test_search_exact_page_boundary_next_is_none(self): + """Test that next=None when results exactly fill the page""" + + # With 11 documents and per_page=11, all results fit on one page + response = self.search("per_page=11") + assert response["next"] is None + assert len(response["results"]) == 11 + def test_search_per_page(self): """Test fetching a different number of results per page""" From 767c4185b5f233cfb96ac85a364b3c50fc1bbf63 Mon Sep 17 00:00:00 2001 From: Joe Amditis Date: Tue, 24 Mar 2026 15:54:00 -0400 Subject: [PATCH 2/2] fix: clamp probe row to max_page_size Copilot review: rows + 1 could exceed max_page_size when the client requests the maximum per_page. Now only probes with +1 when rows < max_page_size; falls back to hit-count check at the cap. --- documentcloud/documents/search.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/documentcloud/documents/search.py b/documentcloud/documents/search.py index 61142988..05728676 100644 --- a/documentcloud/documents/search.py +++ b/documentcloud/documents/search.py @@ -3,6 +3,7 @@ from django.http.request import QueryDict # Standard Library +import math import re from collections import defaultdict from datetime import datetime @@ -595,8 +596,10 @@ def _paginate_page(query_params, user): f"The selected `page` value of {page} is over the limit of {max_page}" ) start = (page - 1) * rows - # Request one extra row to detect whether a next page exists - return {"rows": rows + 1, "start": start}, {"page": page, "rows": rows} + # Request one extra row to detect whether a next page exists, + # but never exceed max_page_size + solr_rows = rows + 1 if rows < max_page_size else rows + return {"rows": solr_rows, "start": start}, {"page": page, "rows": rows} def _paginate_cursor(query_params, user): @@ -627,10 +630,13 @@ def _format_response(results, query_params, user, escaped, page_data): if "page" in page_data: page = page_data["page"] - # For page-number pagination, we requested per_page + 1 rows to - # detect whether more results exist without relying on hit counts. - has_more = len(results.docs) > per_page - results.docs = results.docs[:per_page] + # When we could request per_page + 1, use the extra row to detect + # whether more results exist. Otherwise fall back to hit counts. + if len(results.docs) > per_page: + has_more = True + results.docs = results.docs[:per_page] + else: + has_more = page < math.ceil(results.hits / per_page) if has_more: query_params["page"] = page + 1