From 3c2c6906d2a932346a1378d7f151c9b90f7aef18 Mon Sep 17 00:00:00 2001 From: Maxime Liquet Date: Wed, 28 Apr 2021 13:42:10 +0200 Subject: [PATCH 1/5] fix: log warning when items_per_page > max_items_per_page --- eodag/api/core.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/eodag/api/core.py b/eodag/api/core.py index f46a3675f..ae2ea9cd2 100644 --- a/eodag/api/core.py +++ b/eodag/api/core.py @@ -993,6 +993,18 @@ def _do_search(self, search_plugin, count=True, raise_errors=False, **kwargs): .. versionadded:: 1.0 """ + max_items_per_page = search_plugin.config.pagination.get("max_items_per_page") + if max_items_per_page and kwargs["items_per_page"] > max_items_per_page: + logger.warning( + "EODAG believes that you might have asked for more products/items " + "than the maximum allowed by '%s': %s > %s. Try to lower " + "the value of 'items_per_page' and get the next page (e.g. 'page=2'), " + "or directly use the 'search_all' method.", + search_plugin.provider, + kwargs["items_per_page"], + max_items_per_page, + ) + results = SearchResult([]) total_results = 0 try: From 90981adcbbad20048b3756b0f4bc435b650aa3c3 Mon Sep 17 00:00:00 2001 From: Maxime Liquet Date: Wed, 28 Apr 2021 13:43:09 +0200 Subject: [PATCH 2/5] fix: increase log level for search_all at each iteration --- eodag/api/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eodag/api/core.py b/eodag/api/core.py index ae2ea9cd2..dd5648fd0 100644 --- a/eodag/api/core.py +++ b/eodag/api/core.py @@ -659,7 +659,7 @@ def search_iter_page( while True: if iteration > 1 and next_page_url: pagination_config["next_page_url_tpl"] = next_page_url - logger.debug("Iterate over pages: search page %s", iteration) + logger.info("Iterate search over multiple pages: page #%s", iteration) try: products, _ = self._do_search( search_plugin, count=False, raise_errors=True, **search_kwargs From 2537400fcd940131074decaa4664e10ea7944224 Mon Sep 17 00:00:00 2001 From: Maxime Liquet Date: Wed, 28 Apr 2021 13:44:17 +0200 Subject: [PATCH 3/5] fix: add max_items_per_page for some providers --- eodag/resources/providers.yml | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/eodag/resources/providers.yml b/eodag/resources/providers.yml index c1ba98b93..3d5bc8279 100644 --- a/eodag/resources/providers.yml +++ b/eodag/resources/providers.yml @@ -65,6 +65,10 @@ pagination: next_page_query_obj: '{{"limit":{items_per_page},"page":{page}}}' total_items_nb_key_path: '$.meta.found' + # 2021/04/28: aws_eos doesn't specify a limit in its docs. It says that the default + # value is 500 (https://doc.eos.com/search.api/#single-dataset-search). + # Let's set it to this value for now + max_items_per_page: 500 query_params_key: 'search' discover_metadata: auto_discovery: true @@ -510,7 +514,7 @@ pagination: next_page_url_tpl: '{url}?{search}&maxRecords={items_per_page}&page={page}' total_items_nb_key_path: '$.properties.totalResults' - # 2019/03/19: Returns a 400 error code if greater than 500. + # 2021/03/19: Returns a 400 error code if greater than 500. max_items_per_page: 500 discover_metadata: auto_discovery: true @@ -684,7 +688,7 @@ pagination: next_page_url_tpl: '{url}?{search}&maxRecords={items_per_page}&page={page}' total_items_nb_key_path: '$.properties.totalResults' - # 2019/03/19: 500 is the max, no error if greater + # 2021/03/19: 500 is the max, no error if greater max_items_per_page: 500 discover_metadata: auto_discovery: true @@ -876,7 +880,7 @@ pagination: next_page_url_tpl: '{url}?{search}&size={items_per_page}&from={skip}' total_items_nb_key_path: '$.totalnb' - # 2019/03/19: It's not possible to get more than 10_000 products + # 2021/03/19: It's not possible to get more than 10_000 products # Returns a 400 Bad Request if more products are requested. max_items_per_page: 10_000 results_entry: 'hits' @@ -966,7 +970,7 @@ pagination: next_page_url_tpl: '{url}?{search}&maxRecords={items_per_page}&page={page}' total_items_nb_key_path: '$.properties.totalResults' - # 2019/03/19: 2000 is the max, 400 error if greater + # 2021/03/19: 2000 is the max, 400 error if greater max_items_per_page: 2_000 discover_metadata: auto_discovery: true @@ -1121,7 +1125,7 @@ pagination: next_page_url_tpl: '{url}?{search}&maxRecords={items_per_page}&startIndex={skip_base_1}' total_items_nb_key_path: '//os:totalResults/text()' - # 2019/03/19: 50 is the max, no error if greater + # 2021/03/19: 50 is the max, no error if greater max_items_per_page: 50 discover_metadata: auto_discovery: true @@ -1345,7 +1349,7 @@ pagination: count_endpoint: 'https://catalogue.onda-dias.eu/dias-catalogue/Products/$count' next_page_url_tpl: '{url}?{search}&$top={items_per_page}&$skip={skip}' - # 2019/03/19: 2000 is the max, if greater 200 response but contains an error message + # 2021/03/19: 2000 is the max, if greater 200 response but contains an error message max_items_per_page: 2_000 results_entry: 'value' literal_search_params: @@ -1483,7 +1487,7 @@ type: StacSearch api_endpoint: https://eod-catalog-svc-prod.astraea.earth/search pagination: - # 2019/03/19: The docs (https://eod-catalog-svc-prod.astraea.earth/api.html#operation/getSearchSTAC) + # 2021/03/19: The docs (https://eod-catalog-svc-prod.astraea.earth/api.html#operation/getSearchSTAC) # say the max is 10_000. In practice 1_000 products are returned if more are asked (even greater # than 10_000), without any error. # This provider doesn't implement any pagination, let's just try to get the maximum number of @@ -1570,7 +1574,7 @@ api_endpoint: https://landsatlook.usgs.gov/sat-api/collections/{collection}/items pagination: total_items_nb_key_path: '$.meta.found' - # 2019/03/19: no more than 10_000 (if greater, returns a 500 error code) + # 2021/03/19: no more than 10_000 (if greater, returns a 500 error code) # but in practive if an Internal Server Error is returned for more than # about 500 products. max_items_per_page: 500 @@ -1618,6 +1622,11 @@ search: !plugin type: StacSearch api_endpoint: https://earth-search.aws.element84.com/v0/search + pagination: + # 2021/04/28: Earth-Search relies on Sat-API whose docs (http://sat-utils.github.io/sat-api/#search-stac-items-by-simple-filtering-) + # say the max is 10_000. In practice a too high number (e.g. 5_000) returns a 502 error ({"message": "Internal server error"}). + # Let's set it to a more robust number: 500 + max_items_per_page: 500 metadata_mapping: id: - 'ids=["{id}"]' @@ -1684,6 +1693,11 @@ search: !plugin type: StacSearch api_endpoint: https://earth-search.aws.element84.com/v0/search + pagination: + # 2021/04/28: Earth-Search relies on Sat-API whose docs (http://sat-utils.github.io/sat-api/#search-stac-items-by-simple-filtering-) + # say the max is 10_000. In practice a too high number (e.g. 5_000) returns a 502 error ({"message": "Internal server error"}). + # Let's set it to a more robust number: 500 + max_items_per_page: 500 metadata_mapping: id: - 'ids=["{id}"]' From 486f9cd36add9fdce4fe18e9b9fd4c47c79d6c94 Mon Sep 17 00:00:00 2001 From: Maxime Liquet Date: Wed, 28 Apr 2021 14:15:50 +0200 Subject: [PATCH 4/5] fix: existing tests --- tests/units/test_core.py | 71 +++++++++++++++++++++++++++++++++------- 1 file changed, 60 insertions(+), 11 deletions(-) diff --git a/tests/units/test_core.py b/tests/units/test_core.py index 123e20e51..8715d59a6 100644 --- a/tests/units/test_core.py +++ b/tests/units/test_core.py @@ -719,12 +719,17 @@ def test__prepare_search_peps_plugins_product_not_available(self): @mock.patch("eodag.plugins.search.qssearch.QueryStringSearch", autospec=True) def test__do_search_counts_by_default(self, search_plugin): - """__do_search must create a count query by default""" + """_do_search must create a count query by default""" search_plugin.provider = "peps" search_plugin.query.return_value = ( self.search_results.data, # a list must be returned by .query self.search_results_size, ) + + class DummyConfig: + pagination = {} + + search_plugin.config = DummyConfig() sr, estimate = self.dag._do_search(search_plugin=search_plugin) self.assertIsInstance(sr, SearchResult) self.assertEqual(len(sr), self.search_results_size) @@ -732,22 +737,34 @@ def test__do_search_counts_by_default(self, search_plugin): @mock.patch("eodag.plugins.search.qssearch.QueryStringSearch", autospec=True) def test__do_search_without_count(self, search_plugin): - """__do_search must be able to create a query without a count""" + """_do_search must be able to create a query without a count""" search_plugin.provider = "peps" search_plugin.query.return_value = ( self.search_results.data, None, # .query must return None if count is False ) + + class DummyConfig: + pagination = {} + + search_plugin.config = DummyConfig() + sr, estimate = self.dag._do_search(search_plugin=search_plugin, count=False) self.assertIsNone(estimate) self.assertEqual(len(sr), self.search_results_size) @mock.patch("eodag.plugins.search.qssearch.QueryStringSearch", autospec=True) def test__do_search_paginated_handle_no_count_returned(self, search_plugin): - """__do_search must provide a best estimate when a provider doesn't return a count""" # noqa + """_do_search must provide a best estimate when a provider doesn't return a count""" # noqa search_plugin.provider = "peps" # If the provider doesn't return a count, .query returns 0 search_plugin.query.return_value = (self.search_results.data, 0) + + class DummyConfig: + pagination = {} + + search_plugin.config = DummyConfig() + page = 4 sr, estimate = self.dag._do_search( search_plugin=search_plugin, @@ -760,12 +777,18 @@ def test__do_search_paginated_handle_no_count_returned(self, search_plugin): @mock.patch("eodag.plugins.search.qssearch.QueryStringSearch", autospec=True) def test__do_search_paginated_handle_fuzzy_count(self, search_plugin): - """__do_search must provide a best estimate when a provider returns a fuzzy count""" # noqa + """_do_search must provide a best estimate when a provider returns a fuzzy count""" # noqa search_plugin.provider = "peps" search_plugin.query.return_value = ( self.search_results.data * 4, # 8 products returned 22, # fuzzy number, less than the real total count ) + + class DummyConfig: + pagination = {} + + search_plugin.config = DummyConfig() + page = 4 items_per_page = 10 sr, estimate = self.dag._do_search( @@ -784,13 +807,19 @@ def test__do_search_paginated_handle_fuzzy_count(self, search_plugin): @mock.patch("eodag.plugins.search.qssearch.QueryStringSearch", autospec=True) def test__do_search_paginated_handle_null_count(self, search_plugin): - """__do_search must provide a best estimate when a provider returns a null count""" # noqa + """_do_search must provide a best estimate when a provider returns a null count""" # noqa # TODO: check the underlying implementation, it doesn't make so much sense since # this case is already covered with nb_res = len(res) * page. This one uses # nb_res = items_per_page * (page - 1) whick actually makes more sense. Choose # one of them. search_plugin.provider = "peps" search_plugin.query.return_value = ([], 0) + + class DummyConfig: + pagination = {} + + search_plugin.config = DummyConfig() + page = 4 items_per_page = 10 sr, estimate = self.dag._do_search( @@ -803,10 +832,14 @@ def test__do_search_paginated_handle_null_count(self, search_plugin): self.assertEqual(estimate, expected_estimate) def test__do_search_does_not_raise_by_default(self): - """__do_search must not raise any error by default""" + """_do_search must not raise any error by default""" # provider attribute required internally by __do_search for logging purposes. + class DummyConfig: + pagination = {} + class DummySearchPlugin: provider = "peps" + config = DummyConfig() sr, estimate = self.dag._do_search(search_plugin=DummySearchPlugin()) self.assertIsInstance(sr, SearchResult) @@ -814,7 +847,7 @@ class DummySearchPlugin: self.assertEqual(estimate, 0) def test__do_search_can_raise_errors(self): - """__do_search must not raise any error by default""" + """_do_search must not raise errors if raise_errors=True""" class DummySearchPlugin: provider = "peps" @@ -825,23 +858,35 @@ class DummySearchPlugin: @mock.patch("eodag.plugins.search.qssearch.QueryStringSearch", autospec=True) def test__do_search_query_products_must_be_a_list(self, search_plugin): - """__do_search expects that each search plugin returns a list of products.""" + """_do_search expects that each search plugin returns a list of products.""" search_plugin.provider = "peps" search_plugin.query.return_value = ( - self.search_results, + self.search_results, # This is not a list but a SearchResult self.search_results_size, ) + + class DummyConfig: + pagination = {} + + search_plugin.config = DummyConfig() + with self.assertRaises(PluginImplementationError): self.dag._do_search(search_plugin=search_plugin, raise_errors=True) @mock.patch("eodag.plugins.search.qssearch.QueryStringSearch", autospec=True) def test__do_search_register_downloader_if_search_intersection(self, search_plugin): - """__do_search must register each product's downloader if search_intersection is not None""" # noqa + """_do_search must register each product's downloader if search_intersection is not None""" # noqa search_plugin.provider = "peps" search_plugin.query.return_value = ( self.search_results.data, self.search_results_size, ) + + class DummyConfig: + pagination = {} + + search_plugin.config = DummyConfig() + sr, _ = self.dag._do_search(search_plugin=search_plugin) for product in sr: self.assertIsNotNone(product.downloader) @@ -850,11 +895,15 @@ def test__do_search_register_downloader_if_search_intersection(self, search_plug def test__do_search_doest_not_register_downloader_if_no_search_intersection( self, search_plugin ): - """__do_search must not register downloaders if search_intersection is None""" + """_do_search must not register downloaders if search_intersection is None""" class DummyProduct: seach_intersecion = None + class DummyConfig: + pagination = {} + + search_plugin.config = DummyConfig() search_plugin.provider = "peps" search_plugin.query.return_value = ([DummyProduct(), DummyProduct()], 2) sr, _ = self.dag._do_search(search_plugin=search_plugin) From 42e62b05b6c298902ca73e0ba8a5bb13deaf4e2f Mon Sep 17 00:00:00 2001 From: Maxime Liquet Date: Wed, 28 Apr 2021 14:20:19 +0200 Subject: [PATCH 5/5] tests: add a test --- tests/units/test_core.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/tests/units/test_core.py b/tests/units/test_core.py index 8715d59a6..fca2aad98 100644 --- a/tests/units/test_core.py +++ b/tests/units/test_core.py @@ -717,6 +717,27 @@ def test__prepare_search_peps_plugins_product_not_available(self): finally: self.dag.set_preferred_provider(prev_fav_provider) + @mock.patch("eodag.plugins.search.qssearch.QueryStringSearch", autospec=True) + def test__do_search_support_itemsperpage_higher_than_maximum(self, search_plugin): + """_do_search must create a count query by default""" + search_plugin.provider = "peps" + search_plugin.query.return_value = ( + self.search_results.data, # a list must be returned by .query + self.search_results_size, + ) + + class DummyConfig: + pagination = {"max_items_per_page": 1} + + search_plugin.config = DummyConfig() + sr, estimate = self.dag._do_search( + search_plugin=search_plugin, + items_per_page=2, + ) + self.assertIsInstance(sr, SearchResult) + self.assertEqual(len(sr), self.search_results_size) + self.assertEqual(estimate, self.search_results_size) + @mock.patch("eodag.plugins.search.qssearch.QueryStringSearch", autospec=True) def test__do_search_counts_by_default(self, search_plugin): """_do_search must create a count query by default"""