Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a warning when items_per_page is higher than max_items_per_page #273

Merged
merged 5 commits into from Apr 29, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
14 changes: 13 additions & 1 deletion eodag/api/core.py
Expand Up @@ -659,7 +659,7 @@ def search_iter_page(
while True:
if iteration > 1 and next_page_url:
pagination_config["next_page_url_tpl"] = next_page_url
logger.debug("Iterate over pages: search page %s", iteration)
logger.info("Iterate search over multiple pages: page #%s", iteration)
try:
products, _ = self._do_search(
search_plugin, count=False, raise_errors=True, **search_kwargs
Expand Down Expand Up @@ -993,6 +993,18 @@ def _do_search(self, search_plugin, count=True, raise_errors=False, **kwargs):

.. versionadded:: 1.0
"""
max_items_per_page = search_plugin.config.pagination.get("max_items_per_page")
if max_items_per_page and kwargs["items_per_page"] > max_items_per_page:
logger.warning(
"EODAG believes that you might have asked for more products/items "
"than the maximum allowed by '%s': %s > %s. Try to lower "
"the value of 'items_per_page' and get the next page (e.g. 'page=2'), "
"or directly use the 'search_all' method.",
search_plugin.provider,
kwargs["items_per_page"],
max_items_per_page,
)

results = SearchResult([])
total_results = 0
try:
Expand Down
30 changes: 22 additions & 8 deletions eodag/resources/providers.yml
Expand Up @@ -65,6 +65,10 @@
pagination:
next_page_query_obj: '{{"limit":{items_per_page},"page":{page}}}'
total_items_nb_key_path: '$.meta.found'
# 2021/04/28: aws_eos doesn't specify a limit in its docs. It says that the default
# value is 500 (https://doc.eos.com/search.api/#single-dataset-search).
# Let's set it to this value for now
max_items_per_page: 500
query_params_key: 'search'
discover_metadata:
auto_discovery: true
Expand Down Expand Up @@ -510,7 +514,7 @@
pagination:
next_page_url_tpl: '{url}?{search}&maxRecords={items_per_page}&page={page}'
total_items_nb_key_path: '$.properties.totalResults'
# 2019/03/19: Returns a 400 error code if greater than 500.
# 2021/03/19: Returns a 400 error code if greater than 500.
max_items_per_page: 500
discover_metadata:
auto_discovery: true
Expand Down Expand Up @@ -684,7 +688,7 @@
pagination:
next_page_url_tpl: '{url}?{search}&maxRecords={items_per_page}&page={page}'
total_items_nb_key_path: '$.properties.totalResults'
# 2019/03/19: 500 is the max, no error if greater
# 2021/03/19: 500 is the max, no error if greater
max_items_per_page: 500
discover_metadata:
auto_discovery: true
Expand Down Expand Up @@ -876,7 +880,7 @@
pagination:
next_page_url_tpl: '{url}?{search}&size={items_per_page}&from={skip}'
total_items_nb_key_path: '$.totalnb'
# 2019/03/19: It's not possible to get more than 10_000 products
# 2021/03/19: It's not possible to get more than 10_000 products
# Returns a 400 Bad Request if more products are requested.
max_items_per_page: 10_000
results_entry: 'hits'
Expand Down Expand Up @@ -966,7 +970,7 @@
pagination:
next_page_url_tpl: '{url}?{search}&maxRecords={items_per_page}&page={page}'
total_items_nb_key_path: '$.properties.totalResults'
# 2019/03/19: 2000 is the max, 400 error if greater
# 2021/03/19: 2000 is the max, 400 error if greater
max_items_per_page: 2_000
discover_metadata:
auto_discovery: true
Expand Down Expand Up @@ -1121,7 +1125,7 @@
pagination:
next_page_url_tpl: '{url}?{search}&maxRecords={items_per_page}&startIndex={skip_base_1}'
total_items_nb_key_path: '//os:totalResults/text()'
# 2019/03/19: 50 is the max, no error if greater
# 2021/03/19: 50 is the max, no error if greater
max_items_per_page: 50
discover_metadata:
auto_discovery: true
Expand Down Expand Up @@ -1345,7 +1349,7 @@
pagination:
count_endpoint: 'https://catalogue.onda-dias.eu/dias-catalogue/Products/$count'
next_page_url_tpl: '{url}?{search}&$top={items_per_page}&$skip={skip}'
# 2019/03/19: 2000 is the max, if greater 200 response but contains an error message
# 2021/03/19: 2000 is the max, if greater 200 response but contains an error message
max_items_per_page: 2_000
results_entry: 'value'
literal_search_params:
Expand Down Expand Up @@ -1483,7 +1487,7 @@
type: StacSearch
api_endpoint: https://eod-catalog-svc-prod.astraea.earth/search
pagination:
# 2019/03/19: The docs (https://eod-catalog-svc-prod.astraea.earth/api.html#operation/getSearchSTAC)
# 2021/03/19: The docs (https://eod-catalog-svc-prod.astraea.earth/api.html#operation/getSearchSTAC)
# say the max is 10_000. In practice 1_000 products are returned if more are asked (even greater
# than 10_000), without any error.
# This provider doesn't implement any pagination, let's just try to get the maximum number of
Expand Down Expand Up @@ -1570,7 +1574,7 @@
api_endpoint: https://landsatlook.usgs.gov/sat-api/collections/{collection}/items
pagination:
total_items_nb_key_path: '$.meta.found'
# 2019/03/19: no more than 10_000 (if greater, returns a 500 error code)
# 2021/03/19: no more than 10_000 (if greater, returns a 500 error code)
# but in practive if an Internal Server Error is returned for more than
# about 500 products.
max_items_per_page: 500
Expand Down Expand Up @@ -1618,6 +1622,11 @@
search: !plugin
type: StacSearch
api_endpoint: https://earth-search.aws.element84.com/v0/search
pagination:
# 2021/04/28: Earth-Search relies on Sat-API whose docs (http://sat-utils.github.io/sat-api/#search-stac-items-by-simple-filtering-)
# say the max is 10_000. In practice a too high number (e.g. 5_000) returns a 502 error ({"message": "Internal server error"}).
# Let's set it to a more robust number: 500
max_items_per_page: 500
metadata_mapping:
id:
- 'ids=["{id}"]'
Expand Down Expand Up @@ -1684,6 +1693,11 @@
search: !plugin
type: StacSearch
api_endpoint: https://earth-search.aws.element84.com/v0/search
pagination:
# 2021/04/28: Earth-Search relies on Sat-API whose docs (http://sat-utils.github.io/sat-api/#search-stac-items-by-simple-filtering-)
# say the max is 10_000. In practice a too high number (e.g. 5_000) returns a 502 error ({"message": "Internal server error"}).
# Let's set it to a more robust number: 500
max_items_per_page: 500
metadata_mapping:
id:
- 'ids=["{id}"]'
Expand Down
92 changes: 81 additions & 11 deletions tests/units/test_core.py
Expand Up @@ -717,37 +717,75 @@ def test__prepare_search_peps_plugins_product_not_available(self):
finally:
self.dag.set_preferred_provider(prev_fav_provider)

@mock.patch("eodag.plugins.search.qssearch.QueryStringSearch", autospec=True)
def test__do_search_support_itemsperpage_higher_than_maximum(self, search_plugin):
"""_do_search must create a count query by default"""
search_plugin.provider = "peps"
search_plugin.query.return_value = (
self.search_results.data, # a list must be returned by .query
self.search_results_size,
)

class DummyConfig:
pagination = {"max_items_per_page": 1}

search_plugin.config = DummyConfig()
sr, estimate = self.dag._do_search(
search_plugin=search_plugin,
items_per_page=2,
)
self.assertIsInstance(sr, SearchResult)
self.assertEqual(len(sr), self.search_results_size)
self.assertEqual(estimate, self.search_results_size)

@mock.patch("eodag.plugins.search.qssearch.QueryStringSearch", autospec=True)
def test__do_search_counts_by_default(self, search_plugin):
"""__do_search must create a count query by default"""
"""_do_search must create a count query by default"""
search_plugin.provider = "peps"
search_plugin.query.return_value = (
self.search_results.data, # a list must be returned by .query
self.search_results_size,
)

class DummyConfig:
pagination = {}

search_plugin.config = DummyConfig()
sr, estimate = self.dag._do_search(search_plugin=search_plugin)
self.assertIsInstance(sr, SearchResult)
self.assertEqual(len(sr), self.search_results_size)
self.assertEqual(estimate, self.search_results_size)

@mock.patch("eodag.plugins.search.qssearch.QueryStringSearch", autospec=True)
def test__do_search_without_count(self, search_plugin):
"""__do_search must be able to create a query without a count"""
"""_do_search must be able to create a query without a count"""
search_plugin.provider = "peps"
search_plugin.query.return_value = (
self.search_results.data,
None, # .query must return None if count is False
)

class DummyConfig:
pagination = {}

search_plugin.config = DummyConfig()

sr, estimate = self.dag._do_search(search_plugin=search_plugin, count=False)
self.assertIsNone(estimate)
self.assertEqual(len(sr), self.search_results_size)

@mock.patch("eodag.plugins.search.qssearch.QueryStringSearch", autospec=True)
def test__do_search_paginated_handle_no_count_returned(self, search_plugin):
"""__do_search must provide a best estimate when a provider doesn't return a count""" # noqa
"""_do_search must provide a best estimate when a provider doesn't return a count""" # noqa
search_plugin.provider = "peps"
# If the provider doesn't return a count, .query returns 0
search_plugin.query.return_value = (self.search_results.data, 0)

class DummyConfig:
pagination = {}

search_plugin.config = DummyConfig()

page = 4
sr, estimate = self.dag._do_search(
search_plugin=search_plugin,
Expand All @@ -760,12 +798,18 @@ def test__do_search_paginated_handle_no_count_returned(self, search_plugin):

@mock.patch("eodag.plugins.search.qssearch.QueryStringSearch", autospec=True)
def test__do_search_paginated_handle_fuzzy_count(self, search_plugin):
"""__do_search must provide a best estimate when a provider returns a fuzzy count""" # noqa
"""_do_search must provide a best estimate when a provider returns a fuzzy count""" # noqa
search_plugin.provider = "peps"
search_plugin.query.return_value = (
self.search_results.data * 4, # 8 products returned
22, # fuzzy number, less than the real total count
)

class DummyConfig:
pagination = {}

search_plugin.config = DummyConfig()

page = 4
items_per_page = 10
sr, estimate = self.dag._do_search(
Expand All @@ -784,13 +828,19 @@ def test__do_search_paginated_handle_fuzzy_count(self, search_plugin):

@mock.patch("eodag.plugins.search.qssearch.QueryStringSearch", autospec=True)
def test__do_search_paginated_handle_null_count(self, search_plugin):
"""__do_search must provide a best estimate when a provider returns a null count""" # noqa
"""_do_search must provide a best estimate when a provider returns a null count""" # noqa
# TODO: check the underlying implementation, it doesn't make so much sense since
# this case is already covered with nb_res = len(res) * page. This one uses
# nb_res = items_per_page * (page - 1) whick actually makes more sense. Choose
# one of them.
search_plugin.provider = "peps"
search_plugin.query.return_value = ([], 0)

class DummyConfig:
pagination = {}

search_plugin.config = DummyConfig()

page = 4
items_per_page = 10
sr, estimate = self.dag._do_search(
Expand All @@ -803,18 +853,22 @@ def test__do_search_paginated_handle_null_count(self, search_plugin):
self.assertEqual(estimate, expected_estimate)

def test__do_search_does_not_raise_by_default(self):
"""__do_search must not raise any error by default"""
"""_do_search must not raise any error by default"""
# provider attribute required internally by __do_search for logging purposes.
class DummyConfig:
pagination = {}

class DummySearchPlugin:
provider = "peps"
config = DummyConfig()

sr, estimate = self.dag._do_search(search_plugin=DummySearchPlugin())
self.assertIsInstance(sr, SearchResult)
self.assertEqual(len(sr), 0)
self.assertEqual(estimate, 0)

def test__do_search_can_raise_errors(self):
"""__do_search must not raise any error by default"""
"""_do_search must not raise errors if raise_errors=True"""

class DummySearchPlugin:
provider = "peps"
Expand All @@ -825,23 +879,35 @@ class DummySearchPlugin:

@mock.patch("eodag.plugins.search.qssearch.QueryStringSearch", autospec=True)
def test__do_search_query_products_must_be_a_list(self, search_plugin):
"""__do_search expects that each search plugin returns a list of products."""
"""_do_search expects that each search plugin returns a list of products."""
search_plugin.provider = "peps"
search_plugin.query.return_value = (
self.search_results,
self.search_results, # This is not a list but a SearchResult
self.search_results_size,
)

class DummyConfig:
pagination = {}

search_plugin.config = DummyConfig()

with self.assertRaises(PluginImplementationError):
self.dag._do_search(search_plugin=search_plugin, raise_errors=True)

@mock.patch("eodag.plugins.search.qssearch.QueryStringSearch", autospec=True)
def test__do_search_register_downloader_if_search_intersection(self, search_plugin):
"""__do_search must register each product's downloader if search_intersection is not None""" # noqa
"""_do_search must register each product's downloader if search_intersection is not None""" # noqa
search_plugin.provider = "peps"
search_plugin.query.return_value = (
self.search_results.data,
self.search_results_size,
)

class DummyConfig:
pagination = {}

search_plugin.config = DummyConfig()

sr, _ = self.dag._do_search(search_plugin=search_plugin)
for product in sr:
self.assertIsNotNone(product.downloader)
Expand All @@ -850,11 +916,15 @@ def test__do_search_register_downloader_if_search_intersection(self, search_plug
def test__do_search_doest_not_register_downloader_if_no_search_intersection(
self, search_plugin
):
"""__do_search must not register downloaders if search_intersection is None"""
"""_do_search must not register downloaders if search_intersection is None"""

class DummyProduct:
seach_intersecion = None

class DummyConfig:
pagination = {}

search_plugin.config = DummyConfig()
search_plugin.provider = "peps"
search_plugin.query.return_value = ([DummyProduct(), DummyProduct()], 2)
sr, _ = self.dag._do_search(search_plugin=search_plugin)
Expand Down