From 7cb57d194b5ec874bea0a71e43d4a65939c2a5d1 Mon Sep 17 00:00:00 2001
From: Joe Amditis <amditisj@mail.montclair.edu>
Date: Tue, 24 Mar 2026 15:32:05 -0400
Subject: [PATCH 1/2] fix: return next=None on last page of search results
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For page-number pagination, request per_page + 1 rows from Solr and
use the extra row's presence to determine if more results exist. This
is more reliable than using results.hits which can be approximate.

The cursor pagination path is unchanged — Solr's nextCursorMark
equality check already correctly detects the last page.

Fixes MuckRock/documentcloud#372
---
 documentcloud/documents/search.py            | 19 +++++++++++------
 documentcloud/documents/tests/test_search.py | 22 ++++++++++++++++++++
 2 files changed, 35 insertions(+), 6 deletions(-)

diff --git a/documentcloud/documents/search.py b/documentcloud/documents/search.py
index facc8e98..61142988 100644
--- a/documentcloud/documents/search.py
+++ b/documentcloud/documents/search.py
@@ -3,7 +3,6 @@
 from django.http.request import QueryDict
 
 # Standard Library
-import math
 import re
 from collections import defaultdict
 from datetime import datetime
@@ -596,7 +595,8 @@ def _paginate_page(query_params, user):
             f"The selected `page` value of {page} is over the limit of {max_page}"
         )
     start = (page - 1) * rows
-    return {"rows": rows, "start": start}, {"page": page, "rows": rows}
+    # Request one extra row to detect whether a next page exists
+    return {"rows": rows + 1, "start": start}, {"page": page, "rows": rows}
 
 
 def _paginate_cursor(query_params, user):
@@ -615,7 +615,7 @@ def _paginate_cursor(query_params, user):
         CursorPagination.page_size,
         max_value=max_page_size,
     )
-    return {"rows": rows, "cursorMark": cursor}, {}
+    return {"rows": rows, "cursorMark": cursor}, {"rows": rows}
 
 
 def _format_response(results, query_params, user, escaped, page_data):
@@ -623,11 +623,16 @@ def _format_response(results, query_params, user, escaped, page_data):
     base_url = f"{settings.DOCCLOUD_API_URL}/api/documents/search/"
     query_params = query_params.copy()
 
+    per_page = page_data["rows"]
+
     if "page" in page_data:
         page = page_data["page"]
-        per_page = page_data["rows"]
-        max_page = math.ceil(results.hits / per_page)
-        if page < max_page:
+        # For page-number pagination, we requested per_page + 1 rows to
+        # detect whether more results exist without relying on hit counts.
+        has_more = len(results.docs) > per_page
+        results.docs = results.docs[:per_page]
+
+        if has_more:
             query_params["page"] = page + 1
             next_url = f"{base_url}?{query_params.urlencode()}"
         else:
@@ -639,6 +644,8 @@ def _format_response(results, query_params, user, escaped, page_data):
         else:
             previous_url = None
     else:
+        # For cursor pagination, Solr's nextCursorMark equality check
+        # reliably detects the last page without needing the N+1 trick.
         if query_params.get("cursor", "*") == results.nextCursorMark:
             next_url = None
         else:
diff --git a/documentcloud/documents/tests/test_search.py b/documentcloud/documents/tests/test_search.py
index 376b6446..315908eb 100644
--- a/documentcloud/documents/tests/test_search.py
+++ b/documentcloud/documents/tests/test_search.py
@@ -167,6 +167,28 @@ def test_search_page(self):
         response = self.search(str(url.query))
         self.assert_documents(response["results"], slice_=slice(10, 20))
 
+    def test_search_last_page_next_is_none(self):
+        """Test that the last page of results has next=None"""
+
+        # With 11 documents and per_page=10, page 1 should have next
+        response = self.search("")
+        assert response["next"] is not None
+        assert response["count"] == 11
+
+        # Page 2 (the last page) should have next=None
+        url = furl(response["next"])
+        response = self.search(str(url.query))
+        assert response["next"] is None
+        assert len(response["results"]) == 1
+
+    def test_search_exact_page_boundary_next_is_none(self):
+        """Test that next=None when results exactly fill the page"""
+
+        # With 11 documents and per_page=11, all results fit on one page
+        response = self.search("per_page=11")
+        assert response["next"] is None
+        assert len(response["results"]) == 11
+
     def test_search_per_page(self):
         """Test fetching a different number of results per page"""
 

From 767c4185b5f233cfb96ac85a364b3c50fc1bbf63 Mon Sep 17 00:00:00 2001
From: Joe Amditis <amditisj@mail.montclair.edu>
Date: Tue, 24 Mar 2026 15:54:00 -0400
Subject: [PATCH 2/2] fix: clamp probe row to max_page_size

Copilot review: rows + 1 could exceed max_page_size when the client
requests the maximum per_page. Now only probes with +1 when
rows < max_page_size; falls back to hit-count check at the cap.
---
 documentcloud/documents/search.py | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/documentcloud/documents/search.py b/documentcloud/documents/search.py
index 61142988..05728676 100644
--- a/documentcloud/documents/search.py
+++ b/documentcloud/documents/search.py
@@ -3,6 +3,7 @@
 from django.http.request import QueryDict
 
 # Standard Library
+import math
 import re
 from collections import defaultdict
 from datetime import datetime
@@ -595,8 +596,10 @@ def _paginate_page(query_params, user):
             f"The selected `page` value of {page} is over the limit of {max_page}"
         )
     start = (page - 1) * rows
-    # Request one extra row to detect whether a next page exists
-    return {"rows": rows + 1, "start": start}, {"page": page, "rows": rows}
+    # Request one extra row to detect whether a next page exists,
+    # but never exceed max_page_size
+    solr_rows = rows + 1 if rows < max_page_size else rows
+    return {"rows": solr_rows, "start": start}, {"page": page, "rows": rows}
 
 
 def _paginate_cursor(query_params, user):
@@ -627,10 +630,13 @@ def _format_response(results, query_params, user, escaped, page_data):
 
     if "page" in page_data:
         page = page_data["page"]
-        # For page-number pagination, we requested per_page + 1 rows to
-        # detect whether more results exist without relying on hit counts.
-        has_more = len(results.docs) > per_page
-        results.docs = results.docs[:per_page]
+        # When we could request per_page + 1, use the extra row to detect
+        # whether more results exist. Otherwise fall back to hit counts.
+        if len(results.docs) > per_page:
+            has_more = True
+            results.docs = results.docs[:per_page]
+        else:
+            has_more = page < math.ceil(results.hits / per_page)
 
         if has_more:
             query_params["page"] = page + 1