From d4ff58def612a6a0d042ec9fc9d07778e03bb980 Mon Sep 17 00:00:00 2001 From: mauromsl Date: Wed, 23 Feb 2022 19:51:40 +0100 Subject: [PATCH 1/4] #2768: OAI resumptionToken to support query params --- src/api/oai/base.py | 41 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 36 insertions(+), 5 deletions(-) diff --git a/src/api/oai/base.py b/src/api/oai/base.py index 9d9f1c210..1c63ff27c 100644 --- a/src/api/oai/base.py +++ b/src/api/oai/base.py @@ -7,9 +7,10 @@ from dateutil import parser as date_parser from django.views.generic.list import BaseListView -from django.views.generic.base import TemplateResponseMixin +from django.views.generic.base import View, TemplateResponseMixin from api.oai import exceptions +from utils.http import allow_mutating_GET metadata_formats = [ { @@ -62,13 +63,39 @@ def get_context_data(self, *args, **kwargs): return context -class OAIPaginationMixin(): +class OAIPaginationMixin(View): + """ A Mixin allowing views to be paginated via OAI resumptionToken + + The resumptionToken is a query parameter that allows a consumer of the OAI + interface to resume consuming elements of a listed query when the bounds + of such list are larger than the maximum number of records allowed per + response. This is achieved by encoding the page number details in the + querystring as the resumptionToken itself. + Furthermore, any filters provided as queryparams need to be encoded + into the resumptionToken as per the spec, it is not mandatory for the + consumer to provide filters on subsequent queries for the same list. This + is addressed in the `dispatch` method where we have no option but to mutate + the self.request.GET member in order to inject those querystring filters. + """ page_kwarg = "token_page" def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self._decoded_token = {} + def dispatch(self, *args, **kwargs): + """ Adds resumptionToken encoded parameters into request.GET + + This makes the implementation of resumptionToken transparent to any + child views that will see all encoded filters in the resumptionToken + as GET parameters + """ + self._decode_token() + with allow_mutating_GET(self.request): + for key, value in self._decoded_token.items(): + self.request.GET[key] = value + return super().dispatch(*args, **kwargs) + def get_context_data(self, **kwargs): self.kwargs["token_page"] = self.page context = super().get_context_data(**kwargs) @@ -86,7 +113,12 @@ def get_token_context(self, context): } def encode_token(self, context): - return quote(urlencode(self.get_token_context(context))) + token_data = self.get_token_context(context) + for key, value in self.request.GET.items(): + if key != "verb": # verb is an exception as per OAI spec + token_data[key] = value + + return quote(urlencode(token_data)) def _decode_token(self): if not self._decoded_token and "resumptionToken" in self.request.GET: @@ -98,7 +130,6 @@ def _decode_token(self): @property def page(self): - self._decode_token() if self._decoded_token: return int(self._decoded_token.get("page", 1)) return None @@ -134,7 +165,7 @@ def get_token_context(self, context): @property def from_(self): self._decode_token() - if self._decoded_token and "from" in self._decoded_token: + if self._decoded_token and "from" in self._decoded_token.items(): return self._decoded_token.get("from") else: return self.request.GET.get("from") From aa9562167c6085aab465a7f803054cd45c61b12d Mon Sep 17 00:00:00 2001 From: mauromsl Date: Wed, 23 Feb 2022 20:00:32 +0100 Subject: [PATCH 2/4] #2768: Fix missing journal filter --- src/api/oai/views.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/api/oai/views.py b/src/api/oai/views.py index 92c2a8e2d..202127377 100644 --- a/src/api/oai/views.py +++ b/src/api/oai/views.py @@ -48,6 +48,8 @@ def get_queryset(self): queryset = submission_models.Article.objects.filter( date_published__isnull=False, ) + if self.request.journal: + queryset = queryset.filter(journal=self.request.journal) set_filter = self.request.GET.get('set') issue_type_list = [issue_type.code for issue_type in journal_models.IssueType.objects.all()] From 4d93d66c872f05a4aa63b6795ccb4be8fc0e3e5f Mon Sep 17 00:00:00 2001 From: mauromsl Date: Wed, 23 Feb 2022 20:01:03 +0100 Subject: [PATCH 3/4] #2768: Adds unit tests for resumptionToken --- src/api/tests/test_oai.py | 51 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 50 insertions(+), 1 deletion(-) diff --git a/src/api/tests/test_oai.py b/src/api/tests/test_oai.py index f2f4a28fd..401a00fdb 100644 --- a/src/api/tests/test_oai.py +++ b/src/api/tests/test_oai.py @@ -4,13 +4,15 @@ __license__ = "AGPL v3" __maintainer__ = "Birkbeck Centre for Technology and Publishing" -from django.test import TestCase, override_settings +from urllib.parse import unquote_plus +from django.test import RequestFactory, TestCase, override_settings from django.urls import reverse from django.utils.http import urlencode from freezegun import freeze_time from lxml import etree +from api.oai.base import OAIPaginationMixin from submission import models as sm_models from utils.testing import helpers @@ -168,6 +170,53 @@ def test_list_sets(self): self.assertEqual(str(response.rendered_content).split(), expected.split()) + @override_settings(URL_CONFIG="domain") + def test_oai_resumption_token_decode(self): + expected = {"custom-param": "custom-value"} + encoded = {"resumptionToken": urlencode(expected)} + class TestView(OAIPaginationMixin): + pass + + query_params = dict( + verb="ListRecords", + **encoded, + ) + request = RequestFactory().get("/api/oai", query_params) + TestView.as_view()(request) + self.assertEqual( + request.GET.get("custom-param"), expected["custom-param"], + "Parameters not being encoded by resumptionToken correctly", + ) + + @override_settings(URL_CONFIG="domain") + @freeze_time("1980-01-01") + def test_oai_resumption_token_encode(self): + expected = {"custom-param": "custom-value"} + expected_encoded = urlencode(expected) + for i in range(1,102): + helpers.create_submission( + journal_id=self.journal.pk, + stage=sm_models.STAGE_PUBLISHED, + date_published="1986-07-12T17:00:00.000+0200", + authors=[self.author], + ) + + path = reverse('OAI_list_records') + query_params = dict( + verb="ListRecords", + metadataPrefix="jats", + **expected, + ) + query_string = urlencode(query_params) + response = self.client.get( + f'{path}?{query_string}', + SERVER_NAME="testserver" + ) + self.assertTrue( + expected_encoded in unquote_plus(response.context["resumption_token"]), + "Query parameter has not been encoded into resumption_token", + ) + LIST_RECORDS_DATA_DC = """ From a4a388c100824c2bad1cc7927eeb39b5c361cd08 Mon Sep 17 00:00:00 2001 From: mauromsl Date: Wed, 23 Feb 2022 20:04:40 +0100 Subject: [PATCH 4/4] #2768: Adds a utility for overriding parsed GET parameters --- src/utils/http/__init__.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 src/utils/http/__init__.py diff --git a/src/utils/http/__init__.py b/src/utils/http/__init__.py new file mode 100644 index 000000000..0cfd883bf --- /dev/null +++ b/src/utils/http/__init__.py @@ -0,0 +1,16 @@ +from contextlib import ContextDecorator + + +class allow_mutating_GET(ContextDecorator): + """CAUTION: Think twice before considering using this""" + + def __init__(self, request): + self.request = request + self._mutable = self.request.GET._mutable + + def __enter__(self): + self.request.GET._mutable = True + return self + + def __exit__(self, *exc): + self.request.GET._mutable = self._mutable