Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Check pagination links->next with validator #266

Merged
merged 7 commits into from
May 6, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
36 changes: 36 additions & 0 deletions .ci/docker_config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
{
"debug": true,
"default_db": "test_server",
"base_url": "http://gh_actions_host:3213",
"implementation": {
"name": "Example implementation",
"source_url": "https://github.com/Materials-Consortia/optimade-python-tools",
"maintainer": {"email": "test@test.org"}
},
"provider": {
"name": "Example provider",
"description": "Provider used for examples, not to be assigned to a real database",
"prefix": "exmpl",
"homepage": "https://example.com",
"index_base_url": "http://gh_actions_host:3214"
},
"provider_fields": {
"structures": [
"band_gap",
"chemsys"
]
},
"aliases": {
"structures": {
"id": "task_id",
"chemical_formula_descriptive": "pretty_formula",
"chemical_formula_reduced": "pretty_formula",
"chemical_formula_anonymous": "formula_anonymous"
}
},
"length_aliases": {
"structures": {
"chemsys": "nelements"
}
}
}
4 changes: 2 additions & 2 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ services:
context: .
dockerfile: Dockerfile
args:
CONFIG_FILE: tests/test_config.json
CONFIG_FILE: .ci/docker_config.json
PORT: 5000
environment:
MAIN: main
Expand All @@ -21,7 +21,7 @@ services:
context: .
dockerfile: Dockerfile
args:
CONFIG_FILE: tests/test_config.json
CONFIG_FILE: .ci/docker_config.json
PORT: 5001
environment:
MAIN: main_index
Expand Down
92 changes: 73 additions & 19 deletions optimade/validator/validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,12 @@
import requests
import sys
import logging
import json
import urllib.parse

try:
import simplejson as json
except ImportError:
import json

from typing import Union

Expand Down Expand Up @@ -93,13 +98,15 @@ def __init__(self, base_url: str, max_retries=5):
Note: A maximum of one slash ("/") is allowed as the last character.

"""
self.base_url = base_url[:-1] if base_url.endswith("/") else base_url
self.base_url = base_url
self.last_request = None
self.response = None
self.max_retries = max_retries

def get(self, request: str):
""" Makes the given request, with a number of retries if being rate limited.
""" Makes the given request, with a number of retries if being rate limited. The
request will be prepended with the `base_url` unless the request appears to be an
absolute URL (i.e. starts with `http://` or `https://`).

Parameters:
request (str): the request to make against the base URL of this client.
Expand All @@ -113,10 +120,12 @@ def get(self, request: str):
the `MAX_RETRIES` attempts.

"""
if request:
self.last_request = f"{self.base_url}{request}".replace("\n", "")
if urllib.parse.urlparse(request, allow_fragments=True).scheme:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you still want to make sure the scheme is part of ("http", "https") or do we not care? If yes, here's a quick and dirty suggestion:

Suggested change
if urllib.parse.urlparse(request, allow_fragments=True).scheme:
request_scheme = urllib.parse.urlparse(request, allow_fragments=True).scheme
if request_scheme and request_scheme in ("http", "https"):

Otherwise, fine 👍

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm going to leave it as is, on the off-chance that an implementation provides a different scheme (presumably accidentally) then it will be obvious from the error spat out. Your suggested change would lead to the next request being e.g. http://base_url.com/ssh://base_url.com?page_offset=5, so I think it's clearer as is.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure. Then the docstring should probably have been updated, but fine.
Again, I just want to be really nitpicky with everything in the validator since (in my opinion) it should be usable for every and any implementation. I.e., it should be really specific in what it wants, while at the same time being very broad in scope.. not easy :)

self.last_request = request
else:
self.last_request = self.base_url
if not request.startswith("/"):
request = f"/{request}"
self.last_request = f"{self.base_url}{request}"

status_code = None
retries = 0
Expand Down Expand Up @@ -298,6 +307,15 @@ def __init__( # pylint: disable=too-many-arguments
RESPONSE_CLASSES_INDEX if self.index else RESPONSE_CLASSES
)

# some simple checks on base_url
base_url = urllib.parse.urlparse(self.base_url)
if base_url.query or any(
endp in base_url.path for endp in self.expected_entry_endpoints
):
raise SystemExit(
"Base URL not appropriate: should not contain an endpoint or filter."
)

# if valid is True on exit, script returns 0 to shell
# if valid is False on exit, script returns 1 to shell
# if valid is None on exit, script returns 2 to shell, indicating an internal failure
Expand Down Expand Up @@ -339,14 +357,6 @@ def main(self):
self.valid = not bool(self.failure_count)
return

# some simple checks on base_url
if "?" in self.base_url or any(
[self.base_url.endswith(endp) for endp in self.expected_entry_endpoints]
):
sys.exit(
"Base URL not appropriate: should not contain an endpoint or filter."
)

# test entire implementation
print(f"Testing entire implementation at {self.base_url}...")
print("\nMandatory tests:")
Expand Down Expand Up @@ -460,16 +470,58 @@ def test_as_type(self):
self.deserialize_response(response, self.as_type_cls)

@test_case
def test_page_limit(self, response):
""" Test that a multi-entry endpoint obeys the page limit. """
def test_page_limit(self, response, check_next_link=True):
""" Test that a multi-entry endpoint obeys the page limit.

Parameters:
response (requests.Response): the response to test for page limit
compliance.

Keyword arguments:
check_next (bool): whether or not to recursively follow and test any
pagination links provided under `links->next`.

Raises:
ResponseError: if test fails in a predictable way.

Returns:
bool, str: True if the test was successful, with a string describing
the success.

"""
try:
num_entries = len(response.json()["data"])
except AttributeError:
response = response.json()
except (AttributeError, json.JSONDecodeError):
raise ResponseError("Unable to test endpoint page limit.")

try:
num_entries = len(response["data"])
except (KeyError, TypeError):
raise ResponseError(
"Response under `data` field was missing or had wrong type."
)

if num_entries > self.page_limit:
raise ResponseError(
f"Endpoint did not obey page limit: {num_entries} entries vs {self.page_limit} limit"
)

try:
ml-evs marked this conversation as resolved.
Show resolved Hide resolved
more_data_available = response["meta"]["more_data_available"]
except KeyError:
raise ResponseError("Field `meta->more_data_available` was missing.")

if more_data_available:
try:
next_link = response["links"]["next"]
except KeyError:
raise ResponseError(
"Endpoint suggested more data was available but provided no links->next link."
)

next_response = self.get_endpoint(next_link)
self.test_page_limit(next_response, check_next_link=False)

return (
True,
f"Endpoint obeyed page limit of {self.page_limit} by returning {num_entries} entries.",
Expand Down Expand Up @@ -561,8 +613,10 @@ def get_available_endpoints(self, base_info):
@test_case
def get_endpoint(self, request_str, optional=False):
""" Gets the response from the endpoint specified by `request_str`. """
request_str = f"/{request_str}".replace("\n", "")

request_str = request_str.replace("\n", "")
ml-evs marked this conversation as resolved.
Show resolved Hide resolved
response = self.client.get(request_str)

if response.status_code != 200:
error_titles = [
error.get("title", "") for error in response.json().get("errors", [])
Expand Down