Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

OAI resumptionToken now considers querystring params #2772

Merged
merged 4 commits into from Feb 24, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
41 changes: 36 additions & 5 deletions src/api/oai/base.py
Expand Up @@ -7,9 +7,10 @@

from dateutil import parser as date_parser
from django.views.generic.list import BaseListView
from django.views.generic.base import TemplateResponseMixin
from django.views.generic.base import View, TemplateResponseMixin

from api.oai import exceptions
from utils.http import allow_mutating_GET

metadata_formats = [
{
Expand Down Expand Up @@ -62,13 +63,39 @@ def get_context_data(self, *args, **kwargs):
return context


class OAIPaginationMixin():
class OAIPaginationMixin(View):
""" A Mixin allowing views to be paginated via OAI resumptionToken

The resumptionToken is a query parameter that allows a consumer of the OAI
interface to resume consuming elements of a listed query when the bounds
of such list are larger than the maximum number of records allowed per
response. This is achieved by encoding the page number details in the
querystring as the resumptionToken itself.
Furthermore, any filters provided as queryparams need to be encoded
into the resumptionToken as per the spec, it is not mandatory for the
consumer to provide filters on subsequent queries for the same list. This
is addressed in the `dispatch` method where we have no option but to mutate
the self.request.GET member in order to inject those querystring filters.
"""
page_kwarg = "token_page"

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self._decoded_token = {}

def dispatch(self, *args, **kwargs):
""" Adds resumptionToken encoded parameters into request.GET

This makes the implementation of resumptionToken transparent to any
child views that will see all encoded filters in the resumptionToken
as GET parameters
"""
self._decode_token()
with allow_mutating_GET(self.request):
for key, value in self._decoded_token.items():
self.request.GET[key] = value
return super().dispatch(*args, **kwargs)

def get_context_data(self, **kwargs):
self.kwargs["token_page"] = self.page
context = super().get_context_data(**kwargs)
Expand All @@ -86,7 +113,12 @@ def get_token_context(self, context):
}

def encode_token(self, context):
return quote(urlencode(self.get_token_context(context)))
token_data = self.get_token_context(context)
for key, value in self.request.GET.items():
if key != "verb": # verb is an exception as per OAI spec
token_data[key] = value

return quote(urlencode(token_data))

def _decode_token(self):
if not self._decoded_token and "resumptionToken" in self.request.GET:
Expand All @@ -98,7 +130,6 @@ def _decode_token(self):

@property
def page(self):
self._decode_token()
if self._decoded_token:
return int(self._decoded_token.get("page", 1))
return None
Expand Down Expand Up @@ -134,7 +165,7 @@ def get_token_context(self, context):
@property
def from_(self):
self._decode_token()
if self._decoded_token and "from" in self._decoded_token:
if self._decoded_token and "from" in self._decoded_token.items():
return self._decoded_token.get("from")
else:
return self.request.GET.get("from")
Expand Down
2 changes: 2 additions & 0 deletions src/api/oai/views.py
Expand Up @@ -48,6 +48,8 @@ def get_queryset(self):
queryset = submission_models.Article.objects.filter(
date_published__isnull=False,
)
if self.request.journal:
queryset = queryset.filter(journal=self.request.journal)
set_filter = self.request.GET.get('set')
issue_type_list = [issue_type.code for issue_type in journal_models.IssueType.objects.all()]

Expand Down
51 changes: 50 additions & 1 deletion src/api/tests/test_oai.py
Expand Up @@ -4,13 +4,15 @@
__license__ = "AGPL v3"
__maintainer__ = "Birkbeck Centre for Technology and Publishing"

from django.test import TestCase, override_settings
from urllib.parse import unquote_plus
from django.test import RequestFactory, TestCase, override_settings
from django.urls import reverse
from django.utils.http import urlencode

from freezegun import freeze_time
from lxml import etree

from api.oai.base import OAIPaginationMixin
from submission import models as sm_models
from utils.testing import helpers

Expand Down Expand Up @@ -168,6 +170,53 @@ def test_list_sets(self):

self.assertEqual(str(response.rendered_content).split(), expected.split())

@override_settings(URL_CONFIG="domain")
def test_oai_resumption_token_decode(self):
expected = {"custom-param": "custom-value"}
encoded = {"resumptionToken": urlencode(expected)}
class TestView(OAIPaginationMixin):
pass

query_params = dict(
verb="ListRecords",
**encoded,
)
request = RequestFactory().get("/api/oai", query_params)
TestView.as_view()(request)
self.assertEqual(
request.GET.get("custom-param"), expected["custom-param"],
"Parameters not being encoded by resumptionToken correctly",
)

@override_settings(URL_CONFIG="domain")
@freeze_time("1980-01-01")
def test_oai_resumption_token_encode(self):
expected = {"custom-param": "custom-value"}
expected_encoded = urlencode(expected)
for i in range(1,102):
helpers.create_submission(
journal_id=self.journal.pk,
stage=sm_models.STAGE_PUBLISHED,
date_published="1986-07-12T17:00:00.000+0200",
authors=[self.author],
)

path = reverse('OAI_list_records')
query_params = dict(
verb="ListRecords",
metadataPrefix="jats",
**expected,
)
query_string = urlencode(query_params)
response = self.client.get(
f'{path}?{query_string}',
SERVER_NAME="testserver"
)
self.assertTrue(
expected_encoded in unquote_plus(response.context["resumption_token"]),
"Query parameter has not been encoded into resumption_token",
)


LIST_RECORDS_DATA_DC = """
<?xml version="1.0" encoding="UTF-8"?>
Expand Down
16 changes: 16 additions & 0 deletions src/utils/http/__init__.py
@@ -0,0 +1,16 @@
from contextlib import ContextDecorator


class allow_mutating_GET(ContextDecorator):
"""CAUTION: Think twice before considering using this"""

def __init__(self, request):
self.request = request
self._mutable = self.request.GET._mutable

def __enter__(self):
self.request.GET._mutable = True
return self

def __exit__(self, *exc):
self.request.GET._mutable = self._mutable