Skip to content

Commit

Permalink
Check pagination links->next with validator (#266)
Browse files Browse the repository at this point in the history
* Check pagination links->next with validator
* Separate docker config file for CI
* Raise response errors if keys are missing in pagination response
* Robustify absolute URL check with urllib
* Try using simplejson and fallback to json in validator
  • Loading branch information
ml-evs committed May 6, 2020
1 parent 2eddcb7 commit b5cc923
Show file tree
Hide file tree
Showing 3 changed files with 111 additions and 21 deletions.
36 changes: 36 additions & 0 deletions .ci/docker_config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
{
"debug": true,
"default_db": "test_server",
"base_url": "http://gh_actions_host:3213",
"implementation": {
"name": "Example implementation",
"source_url": "https://github.com/Materials-Consortia/optimade-python-tools",
"maintainer": {"email": "test@test.org"}
},
"provider": {
"name": "Example provider",
"description": "Provider used for examples, not to be assigned to a real database",
"prefix": "exmpl",
"homepage": "https://example.com",
"index_base_url": "http://gh_actions_host:3214"
},
"provider_fields": {
"structures": [
"band_gap",
"chemsys"
]
},
"aliases": {
"structures": {
"id": "task_id",
"chemical_formula_descriptive": "pretty_formula",
"chemical_formula_reduced": "pretty_formula",
"chemical_formula_anonymous": "formula_anonymous"
}
},
"length_aliases": {
"structures": {
"chemsys": "nelements"
}
}
}
4 changes: 2 additions & 2 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ services:
context: .
dockerfile: Dockerfile
args:
CONFIG_FILE: tests/test_config.json
CONFIG_FILE: .ci/docker_config.json
PORT: 5000
environment:
MAIN: main
Expand All @@ -21,7 +21,7 @@ services:
context: .
dockerfile: Dockerfile
args:
CONFIG_FILE: tests/test_config.json
CONFIG_FILE: .ci/docker_config.json
PORT: 5001
environment:
MAIN: main_index
Expand Down
92 changes: 73 additions & 19 deletions optimade/validator/validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,12 @@
import requests
import sys
import logging
import json
import urllib.parse

try:
import simplejson as json
except ImportError:
import json

from typing import Union

Expand Down Expand Up @@ -93,13 +98,15 @@ def __init__(self, base_url: str, max_retries=5):
Note: A maximum of one slash ("/") is allowed as the last character.
"""
self.base_url = base_url[:-1] if base_url.endswith("/") else base_url
self.base_url = base_url
self.last_request = None
self.response = None
self.max_retries = max_retries

def get(self, request: str):
""" Makes the given request, with a number of retries if being rate limited.
""" Makes the given request, with a number of retries if being rate limited. The
request will be prepended with the `base_url` unless the request appears to be an
absolute URL (i.e. starts with `http://` or `https://`).
Parameters:
request (str): the request to make against the base URL of this client.
Expand All @@ -113,10 +120,12 @@ def get(self, request: str):
the `MAX_RETRIES` attempts.
"""
if request:
self.last_request = f"{self.base_url}{request}".replace("\n", "")
if urllib.parse.urlparse(request, allow_fragments=True).scheme:
self.last_request = request
else:
self.last_request = self.base_url
if not request.startswith("/"):
request = f"/{request}"
self.last_request = f"{self.base_url}{request}"

status_code = None
retries = 0
Expand Down Expand Up @@ -298,6 +307,15 @@ def __init__( # pylint: disable=too-many-arguments
RESPONSE_CLASSES_INDEX if self.index else RESPONSE_CLASSES
)

# some simple checks on base_url
base_url = urllib.parse.urlparse(self.base_url)
if base_url.query or any(
endp in base_url.path for endp in self.expected_entry_endpoints
):
raise SystemExit(
"Base URL not appropriate: should not contain an endpoint or filter."
)

# if valid is True on exit, script returns 0 to shell
# if valid is False on exit, script returns 1 to shell
# if valid is None on exit, script returns 2 to shell, indicating an internal failure
Expand Down Expand Up @@ -339,14 +357,6 @@ def main(self):
self.valid = not bool(self.failure_count)
return

# some simple checks on base_url
if "?" in self.base_url or any(
[self.base_url.endswith(endp) for endp in self.expected_entry_endpoints]
):
sys.exit(
"Base URL not appropriate: should not contain an endpoint or filter."
)

# test entire implementation
print(f"Testing entire implementation at {self.base_url}...")
print("\nMandatory tests:")
Expand Down Expand Up @@ -460,16 +470,58 @@ def test_as_type(self):
self.deserialize_response(response, self.as_type_cls)

@test_case
def test_page_limit(self, response):
""" Test that a multi-entry endpoint obeys the page limit. """
def test_page_limit(self, response, check_next_link=True):
""" Test that a multi-entry endpoint obeys the page limit.
Parameters:
response (requests.Response): the response to test for page limit
compliance.
Keyword arguments:
check_next (bool): whether or not to recursively follow and test any
pagination links provided under `links->next`.
Raises:
ResponseError: if test fails in a predictable way.
Returns:
bool, str: True if the test was successful, with a string describing
the success.
"""
try:
num_entries = len(response.json()["data"])
except AttributeError:
response = response.json()
except (AttributeError, json.JSONDecodeError):
raise ResponseError("Unable to test endpoint page limit.")

try:
num_entries = len(response["data"])
except (KeyError, TypeError):
raise ResponseError(
"Response under `data` field was missing or had wrong type."
)

if num_entries > self.page_limit:
raise ResponseError(
f"Endpoint did not obey page limit: {num_entries} entries vs {self.page_limit} limit"
)

try:
more_data_available = response["meta"]["more_data_available"]
except KeyError:
raise ResponseError("Field `meta->more_data_available` was missing.")

if more_data_available:
try:
next_link = response["links"]["next"]
except KeyError:
raise ResponseError(
"Endpoint suggested more data was available but provided no links->next link."
)

next_response = self.get_endpoint(next_link)
self.test_page_limit(next_response, check_next_link=False)

return (
True,
f"Endpoint obeyed page limit of {self.page_limit} by returning {num_entries} entries.",
Expand Down Expand Up @@ -561,8 +613,10 @@ def get_available_endpoints(self, base_info):
@test_case
def get_endpoint(self, request_str, optional=False):
""" Gets the response from the endpoint specified by `request_str`. """
request_str = f"/{request_str}".replace("\n", "")

request_str = request_str.replace("\n", "")
response = self.client.get(request_str)

if response.status_code != 200:
error_titles = [
error.get("title", "") for error in response.json().get("errors", [])
Expand Down

0 comments on commit b5cc923

Please sign in to comment.