From a6af50c2a05a188f012b2c8582889021ae64e445 Mon Sep 17 00:00:00 2001
From: Alan Iwi <alan.iwi@stfc.ac.uk>
Date: Mon, 26 Jul 2021 16:16:15 +0100
Subject: [PATCH 01/16] triva to satisfy flake8

---
 tests/test_connection.py |  6 +++---
 tests/test_context.py    | 15 ++++++---------
 tests/test_results.py    |  4 ++--
 3 files changed, 11 insertions(+), 14 deletions(-)

diff --git a/tests/test_connection.py b/tests/test_connection.py
index 24e3c9e..37e6388 100644
--- a/tests/test_connection.py
+++ b/tests/test_connection.py
@@ -41,7 +41,7 @@ def test_get_shard_list(self):
         #        replication configuration
         #        on the test server
         assert 'esgf-index1.ceda.ac.uk' in shards
-        # in esg-search in esgf-index1.ceda.ac.uk, there are a bunch 
+        # in esg-search in esgf-index1.ceda.ac.uk, there are a bunch
         # of replicas hosted on esgf-index2
         assert len(shards['esgf-index2.ceda.ac.uk']) > 1
 
@@ -69,7 +69,7 @@ def test_passed_cached_session(self):
         import requests_cache
         td = datetime.timedelta(hours=1)
         session = requests_cache.CachedSession(self.cache,
-                                                    expire_after=td)
+                                               expire_after=td)
         conn = SearchConnection(self.test_service, session=session)
         context = conn.new_context(project='cmip5')
         assert context.facet_constraints['project'] == 'cmip5'
@@ -78,7 +78,7 @@ def test_connection_instance(self):
         import requests_cache
         td = datetime.timedelta(hours=1)
         session = requests_cache.CachedSession(self.cache,
-                                                    expire_after=td)
+                                               expire_after=td)
         with SearchConnection(self.test_service, session=session) as conn:
             context = conn.new_context(project='cmip5')
         assert context.facet_constraints['project'] == 'cmip5'
diff --git a/tests/test_context.py b/tests/test_context.py
index 9724dea..817cf7a 100644
--- a/tests/test_context.py
+++ b/tests/test_context.py
@@ -96,14 +96,13 @@ def test_facet_count(self):
         self.assertTrue(list(counts['model'].keys()) == ['IPSL-CM5A-LR'])
         self.assertTrue(list(counts['project'].keys()) == ['CMIP5'])
 
-    
     def _test_distrib(self, constraints=None, test_service=None,
                       cache=None):
-        if constraints == None:
-            constraints={}
-        if test_service == None:
+        if constraints is None:
+            constraints = {}
+        if test_service is None:
             test_service = self.test_service
-        
+
         conn1 = SearchConnection(test_service, distrib=False, cache=cache)
         context1 = conn1.new_context(**constraints)
         count1 = context1.hit_count
@@ -114,7 +113,6 @@ def _test_distrib(self, constraints=None, test_service=None,
 
         assert count1 < count2
 
-
     _distrib_constraints_few_facets = {'project': 'CMIP5',
                                        'facets': _test_few_facets}
     _distrib_constraints_all_facets = {'project': 'CMIP5',
@@ -125,8 +123,8 @@ def test_distrib_with_few_facets(self):
 
     @pytest.mark.slow
     @pytest.mark.xfail
-    # Expected failure: with facets=* the distrib=true appears to be 
-    # ignored.  This is observed both on the CEDA and also DKRZ index nodes 
+    # Expected failure: with facets=* the distrib=true appears to be
+    # ignored.  This is observed both on the CEDA and also DKRZ index nodes
     # (the only nodes investigated).
     def test_distrib_with_all_facets(self):
         self._test_distrib(constraints=self._distrib_constraints_all_facets)
@@ -143,7 +141,6 @@ def test_distrib_with_cache_with_all_facets(self):
         self._test_distrib(constraints=self._distrib_constraints_all_facets,
                            cache=self.cache)
 
-
     def test_constrain(self):
         conn = SearchConnection(self.test_service, cache=self.cache)
 
diff --git a/tests/test_results.py b/tests/test_results.py
index 74d18d6..fb3d96a 100644
--- a/tests/test_results.py
+++ b/tests/test_results.py
@@ -243,7 +243,7 @@ def test_shards_constrain4(self):
 
     def _test_batch_size_has_no_impact_on_results(self, facets=None):
         conn = SearchConnection(self.test_service, distrib=True)
-        
+
         constraints = {
             'mip_era': 'CMIP6',
             'institution_id': 'CCCma',
@@ -252,7 +252,7 @@ def _test_batch_size_has_no_impact_on_results(self, facets=None):
             'variable_id': 'ua',
             'facets': facets}
         ctx = conn.new_context(**constraints)
-            
+
         results = ctx.search(batch_size=50)
         ids_batch_size_50 = sorted(results, key=lambda x: x.dataset_id)
 

From 5ecaad6fc76197ce3703028deaed646c0d06047e Mon Sep 17 00:00:00 2001
From: Bouwe Andela <b.andela@esciencecenter.nl>
Date: Mon, 9 Aug 2021 14:07:31 +0200
Subject: [PATCH 02/16] Improve ignore_facet_check search argument and fix some
 typo's

---
 pyesgf/search/context.py | 18 ++++++++++--------
 pyesgf/search/results.py | 26 ++++++++++++++++++--------
 tests/test_results.py    | 10 ++++++++++
 3 files changed, 38 insertions(+), 16 deletions(-)

diff --git a/pyesgf/search/context.py b/pyesgf/search/context.py
index e11a606..75a672e 100644
--- a/pyesgf/search/context.py
+++ b/pyesgf/search/context.py
@@ -84,7 +84,7 @@ def __init__(self, connection, constraints, search_type=None,
         self.freetext_constraint = None
         self.facet_constraints = MultiDict()
         self.temporal_constraint = [from_timestamp, to_timestamp]
-        self.geosplatial_constraint = None
+        self.geospatial_constraint = None
 
         self._update_constraints(constraints)
 
@@ -113,7 +113,9 @@ def search(self, batch_size=DEFAULT_BATCH_SIZE, ignore_facet_check=False,
         Perform the search with current constraints returning a set of results.
 
         :batch_size: The number of results to get per HTTP request.
-        :param constraints: Further constraints for this query.  Equivilent
+        :ignore_facet_check: Do not make an extra HTTP request to populate
+            :py:attr:`~facet_counts` and :py:attr:`~hit_count`.
+        :param constraints: Further constraints for this query.  Equivalent
             to calling ``self.constrain(**constraints).search()``
         :return: A ResultSet for this query
 
@@ -123,7 +125,8 @@ def search(self, batch_size=DEFAULT_BATCH_SIZE, ignore_facet_check=False,
         else:
             sc = self
 
-        sc.__update_counts(ignore_facet_check=ignore_facet_check)
+        if not ignore_facet_check:
+            sc.__update_counts()
 
         return ResultSet(sc, batch_size=batch_size)
 
@@ -140,7 +143,7 @@ def get_download_script(self, **constraints):
         """
         Download a script for downloading all files in the set of results.
 
-        :param constraints: Further constraints for this query. Equivilent
+        :param constraints: Further constraints for this query. Equivalent
             to calling ``self.constrain(**constraints).get_download_script()``
         :return: A string containing the script
         """
@@ -188,7 +191,7 @@ def get_facet_options(self):
 
         return facet_options
 
-    def __update_counts(self, ignore_facet_check=False):
+    def __update_counts(self):
         # If hit_count is set the counts are already retrieved
         if self.__hit_count is not None:
             return
@@ -197,11 +200,10 @@ def __update_counts(self, ignore_facet_check=False):
         self.__hit_count = None
         query_dict = self._build_query()
 
-        if not ignore_facet_check:
-            query_dict['facets'] = '*'
-
         if self.facets:
             query_dict['facets'] = self.facets
+        else:
+            query_dict['facets'] = '*'
 
         response = self.connection.send_search(query_dict, limit=0)
         for facet, counts in (list(response['facet_counts']['facet_fields'].items())):
diff --git a/pyesgf/search/results.py b/pyesgf/search/results.py
index 00a57a3..c7cd137 100644
--- a/pyesgf/search/results.py
+++ b/pyesgf/search/results.py
@@ -36,16 +36,15 @@ def __init__(self, context, batch_size=DEFAULT_BATCH_SIZE, eager=True):
         """
         self.context = context
         self.__batch_size = batch_size
-        self.__batch_cache = [None] * ((len(self) // batch_size) + 1)
-        if eager and len(self) > 0:
+        self.__batch_cache = {}
+        self.__len_cache = None
+        if eager:
             self.__batch_cache[0] = self.__get_batch(0)
 
     def __getitem__(self, index):
         batch_i = index // self.batch_size
         offset = index % self.batch_size
-        if self.__batch_cache[batch_i] is None:
-            self.__batch_cache[batch_i] = self.__get_batch(batch_i)
-        batch = self.__batch_cache[batch_i]
+        batch = self.__get_batch(batch_i)
 
         search_type = self.context.search_type
         ResultClass = _result_classes[search_type]
@@ -54,7 +53,9 @@ def __getitem__(self, index):
         return ResultClass(batch[offset], self.context)
 
     def __len__(self):
-        return self.context.hit_count
+        if self.__len_cache is None:
+            self.__get_batch(0)
+        return self.__len_cache
 
     @property
     def batch_size(self):
@@ -71,6 +72,9 @@ def _build_result(self, result):
         return result
 
     def __get_batch(self, batch_i):
+        if batch_i in self.__batch_cache:
+            return self.__batch_cache[batch_i]
+
         offset = self.batch_size * batch_i
         limit = self.batch_size
 
@@ -79,8 +83,14 @@ def __get_batch(self, batch_i):
                     .send_search(query_dict, limit=limit, offset=offset,
                                  shards=self.context.shards))
 
+        if self.__len_cache is None:
+            self.__len_cache = response['response']['numFound']
+
         # !TODO: strip out results
-        return response['response']['docs']
+        batch = response['response']['docs']
+
+        self.__batch_cache[batch_i] = batch
+        return batch
 
 
 class BaseResult(object):
@@ -259,7 +269,7 @@ class FileResult(BaseResult):
     :property checksum: The checksum of the file
     :property checksum_type: The algorithm used for generating the checksum
     :property filename: The filename
-    :proprty size: The file size in bytes
+    :property size: The file size in bytes
 
     """
     @property
diff --git a/tests/test_results.py b/tests/test_results.py
index 74d18d6..8ae9e83 100644
--- a/tests/test_results.py
+++ b/tests/test_results.py
@@ -29,6 +29,16 @@ def test_result1(self):
         assert re.match(r'cmip5\.output1\..+\|esgf-data1.ceda.ac.uk',
                         r1.dataset_id)
 
+    def test_result1_ignore_facet_check(self):
+        conn = SearchConnection(self.test_service, distrib=False)
+
+        ctx = conn.new_context(project='CMIP5')
+        results = ctx.search(ignore_facet_check=True)
+
+        r1 = results[0]
+        assert re.match(r'cmip5\.output1\..+\|esgf-data1.ceda.ac.uk',
+                        r1.dataset_id)
+
     def test_file_context(self):
         conn = SearchConnection(self.test_service, distrib=False)
 

From 9e3f30cdae64ee96c807b993f32cedcee2248ef6 Mon Sep 17 00:00:00 2001
From: Bouwe Andela <b.andela@esciencecenter.nl>
Date: Mon, 9 Aug 2021 14:12:57 +0200
Subject: [PATCH 03/16] Minor simplification

---
 pyesgf/search/results.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyesgf/search/results.py b/pyesgf/search/results.py
index c7cd137..41729a7 100644
--- a/pyesgf/search/results.py
+++ b/pyesgf/search/results.py
@@ -39,7 +39,7 @@ def __init__(self, context, batch_size=DEFAULT_BATCH_SIZE, eager=True):
         self.__batch_cache = {}
         self.__len_cache = None
         if eager:
-            self.__batch_cache[0] = self.__get_batch(0)
+            self.__get_batch(0)
 
     def __getitem__(self, index):
         batch_i = index // self.batch_size

From 0386d53e249e49b279a2475c0df8778e23ab070f Mon Sep 17 00:00:00 2001
From: Bouwe Andela <b.andela@esciencecenter.nl>
Date: Tue, 10 Aug 2021 11:10:22 +0200
Subject: [PATCH 04/16] Fix another typo

---
 pyesgf/search/results.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyesgf/search/results.py b/pyesgf/search/results.py
index 41729a7..471db59 100644
--- a/pyesgf/search/results.py
+++ b/pyesgf/search/results.py
@@ -99,7 +99,7 @@ class BaseResult(object):
 
     Subclasses represent different search types such as File and Dataset.
 
-    :ivar json: The oroginial json representation of the result.
+    :ivar json: The original json representation of the result.
     :ivar context: The SearchContext which generated this result.
     :property urls: a dictionary of the form
                     ``{service: [(url, mime_type), ...], ...}``

From aa240e5ac591fb8987d15a4636e17d7c9cae99a3 Mon Sep 17 00:00:00 2001
From: Alan Iwi <alan.iwi@stfc.ac.uk>
Date: Sat, 21 Aug 2021 18:49:51 +0100
Subject: [PATCH 05/16] add warnings when default facets=* used on distributed
 search

---
 pyesgf/search/context.py | 38 ++++++++++++++++++++++++++++++++++----
 1 file changed, 34 insertions(+), 4 deletions(-)

diff --git a/pyesgf/search/context.py b/pyesgf/search/context.py
index e11a606..8fee6cd 100644
--- a/pyesgf/search/context.py
+++ b/pyesgf/search/context.py
@@ -8,6 +8,8 @@
 
 """
 
+import os
+import sys
 import copy
 
 from webob.multidict import MultiDict
@@ -76,7 +78,7 @@ def __init__(self, connection, constraints, search_type=None,
         self.connection = connection
         self.__facet_counts = None
         self.__hit_count = None
-
+        self._did_facets_star_warning = False
         if search_type is None:
             search_type = self.DEFAULT_SEARCH_TYPE
 
@@ -197,11 +199,12 @@ def __update_counts(self, ignore_facet_check=False):
         self.__hit_count = None
         query_dict = self._build_query()
 
-        if not ignore_facet_check:
-            query_dict['facets'] = '*'
-
         if self.facets:
             query_dict['facets'] = self.facets
+        elif not ignore_facet_check:
+            query_dict['facets'] = '*'
+            if self.connection.distrib:
+                self._do_facets_star_warning()
 
         response = self.connection.send_search(query_dict, limit=0)
         for facet, counts in (list(response['facet_counts']['facet_fields'].items())):
@@ -211,6 +214,33 @@ def __update_counts(self, ignore_facet_check=False):
 
         self.__hit_count = response['response']['numFound']
 
+    def _do_facets_star_warning(self):
+        env_var_name = 'ESGF_PYCLIENT_NO_FACETS_STAR_WARNING'
+        if env_var_name in os.environ:
+            return
+        if not self._did_facets_star_warning:
+            sys.stderr.write(f'''
+-------------------------------------------------------------------------------
+Warning - defaulting to search with facets=*
+
+This behavior is kept for backward-compatibility, but ESGF indexes might not
+successfully perform a distributed search when this option is used, so some
+results may be missing.  For full results, it is recommended to pass a list of
+facets of interest when instantiating a context object.  For example,
+
+      ctx = conn.new_context(facets=['project', 'experiment_id'])
+
+Only the facets that you specify will be present in the facets_counts dictionary.
+
+This warning is displayed when a distributed search is performed while using the
+facets=* default, a maximum of once per context object.  To suppress this warning,
+set the environment variable {env_var_name} to any value
+or explicitly use  conn.new_context(facets='*')
+
+-------------------------------------------------------------------------------
+''')
+            self._did_facets_star_warning = True
+
     # -------------------------------------------------------------------------
     # Constraint mutation interface
     # These functions update the instance in-place.

From 9fb0ac65d49c7df6a3b2a1d53a6bca93e0882a87 Mon Sep 17 00:00:00 2001
From: Alan Iwi <alan.iwi@stfc.ac.uk>
Date: Mon, 18 Oct 2021 16:00:59 +0100
Subject: [PATCH 06/16] changes to avoid tests causing failure where some
 results are missing, which can happen in particular with facets=* or
 distrib=True

---
 tests/test_context.py | 27 +++++++++++++++++++++------
 tests/test_results.py |  7 ++++++-
 2 files changed, 27 insertions(+), 7 deletions(-)

diff --git a/tests/test_context.py b/tests/test_context.py
index 817cf7a..266ba98 100644
--- a/tests/test_context.py
+++ b/tests/test_context.py
@@ -10,12 +10,17 @@
 import os
 
 
+_all_facets_explanation = ('tests with facets=* may fail for server-side reasons, '
+                           'so these are marked XFAIL but may sometimes pass')
+
 class TestContext(TestCase):
 
     _test_few_facets = 'project,model,index_node,data_node'
 
     def setUp(self):
         self.test_service = 'http://esgf-data.dkrz.de/esg-search'
+        #self.test_service = 'http://esgf-index1.ceda.ac.uk/esg-search'
+        #self.test_service = 'http://esgf-node.llnl.gov/esg-search'
         self.cache = os.path.join(os.path.dirname(__file__), 'url_cache')
 
     def test_context_freetext(self):
@@ -52,13 +57,17 @@ def test_context_facet_multivalue2(self):
         self.assertTrue(sorted(context2.facet_constraints.getall('model')) == ['IPSL-CM5A-LR', 'IPSL-CM5A-MR'])
 
     def test_context_facet_multivalue3(self):
+        #
+        # use distrib=False here - with distrib=True sometimes results are missing and we can't safely
+        # compare numbers of results from two queries.
+        #
         conn = SearchConnection(self.test_service, cache=self.cache)
         ctx = conn.new_context(project='CMIP5', query='humidity',
-                               experiment='rcp45')
+                               experiment='rcp45', distrib=False)
         hits1 = ctx.hit_count
         self.assertTrue(hits1 > 0)
         ctx2 = conn.new_context(project='CMIP5', query='humidity',
-                                experiment=['rcp45', 'rcp85'])
+                                experiment=['rcp45', 'rcp85'], distrib=False)
         hits2 = ctx2.hit_count
 
         self.assertTrue(hits2 > hits1)
@@ -111,7 +120,13 @@ def _test_distrib(self, constraints=None, test_service=None,
         context2 = conn2.new_context(**constraints)
         count2 = context2.hit_count
 
-        assert count1 < count2
+        #
+        # We would generally expect more counts with distrib=True but sometimes this fails for
+        # server-side reasons, so we use a weaker test here.
+        #
+
+        #assert count1 < count2
+        assert count1 <= count2
 
     _distrib_constraints_few_facets = {'project': 'CMIP5',
                                        'facets': _test_few_facets}
@@ -122,7 +137,7 @@ def test_distrib_with_few_facets(self):
         self._test_distrib(constraints=self._distrib_constraints_few_facets)
 
     @pytest.mark.slow
-    @pytest.mark.xfail
+    @pytest.mark.xfail(reason=_all_facets_explanation)
     # Expected failure: with facets=* the distrib=true appears to be
     # ignored.  This is observed both on the CEDA and also DKRZ index nodes
     # (the only nodes investigated).
@@ -135,7 +150,7 @@ def test_distrib_with_cache_with_few_facets(self):
                            cache=self.cache)
 
     # @pytest.mark.skip(reason="cache fails on python 3.7")
-    @pytest.mark.xfail
+    @pytest.mark.xfail(reason=_all_facets_explanation)
     # Expected failure: see test_distrib_all_facets above
     def test_distrib_with_cache_with_all_facets(self):
         self._test_distrib(constraints=self._distrib_constraints_all_facets,
@@ -209,7 +224,7 @@ def _test_replica(self, facets=None):
     def test_replica_with_few_facets(self):
         self._test_replica(facets=self._test_few_facets)
 
-    @pytest.mark.xfail
+    @pytest.mark.xfail(reason=_all_facets_explanation)
     # Expected failure - same considerations as test_distrib_all_facets
     @pytest.mark.slow
     def test_replica_with_all_facets(self):
diff --git a/tests/test_results.py b/tests/test_results.py
index fb3d96a..64c3562 100644
--- a/tests/test_results.py
+++ b/tests/test_results.py
@@ -242,7 +242,12 @@ def test_shards_constrain4(self):
             print((j.download_url, j.checksum, j.checksum_type, j.size))
 
     def _test_batch_size_has_no_impact_on_results(self, facets=None):
-        conn = SearchConnection(self.test_service, distrib=True)
+
+        # should work in principle with distrib=True, but use distrib=False
+        # because sometimes returned results misses results from some other indexes
+        # and we don't want this to cause a failure
+
+        conn = SearchConnection(self.test_service, distrib=False)
 
         constraints = {
             'mip_era': 'CMIP6',

From 60be478d1a855cd3c168586f1995ad085c6d1c7b Mon Sep 17 00:00:00 2001
From: Alan Iwi <alan.iwi@stfc.ac.uk>
Date: Tue, 2 Nov 2021 15:08:33 +0000
Subject: [PATCH 07/16] fix the example given in the facets=* warning message,
 and add info in doc strings (e.g. for sphinx)

---
 pyesgf/search/connection.py |  6 ++++--
 pyesgf/search/context.py    | 14 +++++++++++---
 2 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/pyesgf/search/connection.py b/pyesgf/search/connection.py
index 160ec11..089f445 100644
--- a/pyesgf/search/connection.py
+++ b/pyesgf/search/connection.py
@@ -49,8 +49,10 @@ class SearchConnection(object):
         of the ESGF search service excluding the final endpoint name.
         Usually this is http://<hostname>/esg-search
     :ivar distrib: Boolean stating whether searches through this connection are
-        distributed.  I.e. whether the Search service distributes the query to
-        other search peers.
+        distributed.  i.e. whether the Search service distributes the query to
+        other search peers.  See also the documentation for the ``'facets'``
+        argument to ``pyesgf.search.context.SearchContext`` in relation to
+        distributed searches.
     :ivar cache: Path to `sqlite` cache file. Cache expires every hours.
     :ivar timeout: Time (in seconds) before query returns an error.
                    Default: 120s.
diff --git a/pyesgf/search/context.py b/pyesgf/search/context.py
index 8fee6cd..fe87950 100644
--- a/pyesgf/search/context.py
+++ b/pyesgf/search/context.py
@@ -22,8 +22,7 @@
 
 
 class SearchContext(object):
-    """
-    Instances of this class represent the state of a current search.
+    """Instances of this class represent the state of a current search.
     It exposes what facets are available to select and the facet counts
     if they are available.
 
@@ -37,6 +36,15 @@ class SearchContext(object):
 
     :ivar constraints: A dictionary of facet constraints currently in effect.
         ``constraint[facet_name] = [value, value, ...]``
+
+    :ivar facets: A string containing a comma-separated list of facets to be
+        returned (for example ``'source_id,ensemble_id'``). If set, this will
+        be used to select which facet counts to include, as returned in the
+        ``facet_counts`` dictionary.  Defaults to including all available
+        facets, but with distributed searches (where the SearchConnection
+        instance was created with ``distrib=True``), some results may be
+        missing for server-side reasons when requesting all facets, so a
+        warning message will be issued. This contains further details.
     :property facet_counts: A dictionary of available hits with each
         facet value for the search as currently constrained.
         This property returns a dictionary of dictionaries where
@@ -228,7 +236,7 @@ def _do_facets_star_warning(self):
 results may be missing.  For full results, it is recommended to pass a list of
 facets of interest when instantiating a context object.  For example,
 
-      ctx = conn.new_context(facets=['project', 'experiment_id'])
+      ctx = conn.new_context(facets='project,experiment_id')
 
 Only the facets that you specify will be present in the facets_counts dictionary.
 

From 4388525fb9f4469353c81ec9df2277d769df3f47 Mon Sep 17 00:00:00 2001
From: Alan Iwi <alan.iwi@stfc.ac.uk>
Date: Tue, 2 Nov 2021 15:12:18 +0000
Subject: [PATCH 08/16] tiny docstring tweak

---
 pyesgf/search/connection.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyesgf/search/connection.py b/pyesgf/search/connection.py
index 089f445..3dd3d81 100644
--- a/pyesgf/search/connection.py
+++ b/pyesgf/search/connection.py
@@ -50,7 +50,7 @@ class SearchConnection(object):
         Usually this is http://<hostname>/esg-search
     :ivar distrib: Boolean stating whether searches through this connection are
         distributed.  i.e. whether the Search service distributes the query to
-        other search peers.  See also the documentation for the ``'facets'``
+        other search peers.  See also the documentation for the ``facets``
         argument to ``pyesgf.search.context.SearchContext`` in relation to
         distributed searches.
     :ivar cache: Path to `sqlite` cache file. Cache expires every hours.

From 6705ea4c809c7c6f151bf20875cec6caa9bdd917 Mon Sep 17 00:00:00 2001
From: Alan Iwi <alan.iwi@stfc.ac.uk>
Date: Wed, 3 Nov 2021 15:15:53 +0000
Subject: [PATCH 09/16] tweak var names in test

---
 tests/test_results.py | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/tests/test_results.py b/tests/test_results.py
index 0766a7f..1e03736 100644
--- a/tests/test_results.py
+++ b/tests/test_results.py
@@ -13,7 +13,7 @@
 
 class TestResults(TestCase):
 
-    _test_few_facets = 'project,model,index_node,data_node'
+    _test_facets = 'project,model,index_node,data_node'
 
     def setUp(self):
         self.test_service = 'http://esgf-index1.ceda.ac.uk/esg-search'
@@ -278,11 +278,7 @@ def _test_batch_size_has_no_impact_on_results(self, facets=None):
         assert len(ids_batch_size_50) == len(ids_batch_size_100)
 
     @pytest.mark.slow
-    def test_test_batch_size_has_no_impact_on_results_with_few_facets(self):
+    def test_batch_size_has_no_impact_on_results_with_few_facets(self):
         self._test_batch_size_has_no_impact_on_results(
-            facets=self._test_few_facets)
+            facets=self._test_facets)
 
-    @pytest.mark.slow
-    @pytest.mark.xfail
-    def test_test_batch_size_has_no_impact_on_results_with_all_facets(self):
-        self._test_batch_size_has_no_impact_on_results()

From 5f86fe052f8b9c4be1abbcf0af55a58f54e6a19b Mon Sep 17 00:00:00 2001
From: Alan Iwi <alan.iwi@stfc.ac.uk>
Date: Wed, 3 Nov 2021 15:17:45 +0000
Subject: [PATCH 10/16] stop flake8 moaning

---
 tests/test_context.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/tests/test_context.py b/tests/test_context.py
index 266ba98..d67194e 100644
--- a/tests/test_context.py
+++ b/tests/test_context.py
@@ -13,14 +13,15 @@
 _all_facets_explanation = ('tests with facets=* may fail for server-side reasons, '
                            'so these are marked XFAIL but may sometimes pass')
 
+
 class TestContext(TestCase):
 
     _test_few_facets = 'project,model,index_node,data_node'
 
     def setUp(self):
         self.test_service = 'http://esgf-data.dkrz.de/esg-search'
-        #self.test_service = 'http://esgf-index1.ceda.ac.uk/esg-search'
-        #self.test_service = 'http://esgf-node.llnl.gov/esg-search'
+        # self.test_service = 'http://esgf-index1.ceda.ac.uk/esg-search'
+        # self.test_service = 'http://esgf-node.llnl.gov/esg-search'
         self.cache = os.path.join(os.path.dirname(__file__), 'url_cache')
 
     def test_context_freetext(self):
@@ -125,7 +126,7 @@ def _test_distrib(self, constraints=None, test_service=None,
         # server-side reasons, so we use a weaker test here.
         #
 
-        #assert count1 < count2
+        # assert count1 < count2
         assert count1 <= count2
 
     _distrib_constraints_few_facets = {'project': 'CMIP5',

From 8b1444894f37892a1b15ebbffc8ed5a54e686c6c Mon Sep 17 00:00:00 2001
From: Alan Iwi <alan.iwi@stfc.ac.uk>
Date: Wed, 3 Nov 2021 15:21:49 +0000
Subject: [PATCH 11/16] mark test_context_facet_options as xfail (known
 server-side unreliability...)

---
 tests/test_context.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/test_context.py b/tests/test_context.py
index d67194e..7f0fa6a 100644
--- a/tests/test_context.py
+++ b/tests/test_context.py
@@ -73,6 +73,7 @@ def test_context_facet_multivalue3(self):
 
         self.assertTrue(hits2 > hits1)
 
+    @pytest.mark.xfail("results may sometimes be missing - may or may not pass")
     def test_context_facet_options(self):
         conn = SearchConnection(self.test_service, cache=self.cache)
         context = conn.new_context(project='CMIP5', model='IPSL-CM5A-LR',

From 2e68cb134f632af520c657c8ab8c5a7eead15915 Mon Sep 17 00:00:00 2001
From: Alan Iwi <alan.iwi@stfc.ac.uk>
Date: Wed, 3 Nov 2021 15:35:56 +0000
Subject: [PATCH 12/16] fix pytest decorator invalid syntax in previous commit

---
 tests/test_context.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_context.py b/tests/test_context.py
index 7f0fa6a..8e73894 100644
--- a/tests/test_context.py
+++ b/tests/test_context.py
@@ -73,7 +73,7 @@ def test_context_facet_multivalue3(self):
 
         self.assertTrue(hits2 > hits1)
 
-    @pytest.mark.xfail("results may sometimes be missing - may or may not pass")
+    @pytest.mark.xfail(reason="results may sometimes be missing - may or may not pass")
     def test_context_facet_options(self):
         conn = SearchConnection(self.test_service, cache=self.cache)
         context = conn.new_context(project='CMIP5', model='IPSL-CM5A-LR',

From fad406ea4198ff093b965199cc7eb78f1fdf5d12 Mon Sep 17 00:00:00 2001
From: Alan Iwi <alan.iwi@stfc.ac.uk>
Date: Wed, 3 Nov 2021 15:37:02 +0000
Subject: [PATCH 13/16] in test_logon.py, use environment variable for myproxy
 server name, like with username etc

---
 tests/test_logon.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/test_logon.py b/tests/test_logon.py
index fb5b10f..c0d43f2 100644
--- a/tests/test_logon.py
+++ b/tests/test_logon.py
@@ -17,10 +17,11 @@
 except (ImportError, SyntaxError):
     _has_myproxy = False
 
+
 TEST_USER = os.environ.get('USERNAME')
 TEST_PASSWORD = os.environ.get('PASSWORD')
 TEST_OPENID = os.environ.get('OPENID')
-TEST_MYPROXY = 'slcs.ceda.ac.uk'
+TEST_MYPROXY = os.environ.get('MYPROXY')
 
 
 TEST_DATA_DIR = op.join(op.dirname(__file__), 'data')

From 40c8795cf0bd2636824e1e1c2b215395b678b6d7 Mon Sep 17 00:00:00 2001
From: Alan Iwi <alan.iwi@stfc.ac.uk>
Date: Wed, 3 Nov 2021 15:46:22 +0000
Subject: [PATCH 14/16] silence another flake8 gripe (blank line at end of
 file)

---
 tests/test_results.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/test_results.py b/tests/test_results.py
index 1e03736..cec6f61 100644
--- a/tests/test_results.py
+++ b/tests/test_results.py
@@ -281,4 +281,3 @@ def _test_batch_size_has_no_impact_on_results(self, facets=None):
     def test_batch_size_has_no_impact_on_results_with_few_facets(self):
         self._test_batch_size_has_no_impact_on_results(
             facets=self._test_facets)
-

From 48bb2c8f21913da90f38319fe63bfae49dbb36ac Mon Sep 17 00:00:00 2001
From: Alan Iwi <alan.iwi@stfc.ac.uk>
Date: Wed, 3 Nov 2021 16:00:01 +0000
Subject: [PATCH 15/16] a couple more xfails for tests subject to server-side
 vagaries

---
 tests/test_context.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/test_context.py b/tests/test_context.py
index 8e73894..bd8e2e7 100644
--- a/tests/test_context.py
+++ b/tests/test_context.py
@@ -158,6 +158,7 @@ def test_distrib_with_cache_with_all_facets(self):
         self._test_distrib(constraints=self._distrib_constraints_all_facets,
                            cache=self.cache)
 
+    @pytest.mark.xfail("may sometimes fail if server returns incomplete set of results")
     def test_constrain(self):
         conn = SearchConnection(self.test_service, cache=self.cache)
 
@@ -186,6 +187,7 @@ def test_constrain_regression1(self):
         context2 = context.constrain(experiment='historical')
         self.assertTrue('experiment' in context2.facet_constraints)
 
+    @pytest.mark.xfail("may sometimes fail if server returns incomplete set of results")
     def test_negative_facet(self):
         conn = SearchConnection(self.test_service, cache=self.cache)
 

From 3cd87f807c11fbe7070b872878455bbbc3bba148 Mon Sep 17 00:00:00 2001
From: Alan Iwi <alan.iwi@stfc.ac.uk>
Date: Wed, 3 Nov 2021 16:11:54 +0000
Subject: [PATCH 16/16] pytest syntax (again, oops...)

---
 tests/test_context.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_context.py b/tests/test_context.py
index bd8e2e7..0fff43d 100644
--- a/tests/test_context.py
+++ b/tests/test_context.py
@@ -158,7 +158,7 @@ def test_distrib_with_cache_with_all_facets(self):
         self._test_distrib(constraints=self._distrib_constraints_all_facets,
                            cache=self.cache)
 
-    @pytest.mark.xfail("may sometimes fail if server returns incomplete set of results")
+    @pytest.mark.xfail(reason="may sometimes fail if server returns incomplete set of results")
     def test_constrain(self):
         conn = SearchConnection(self.test_service, cache=self.cache)
 
@@ -187,7 +187,7 @@ def test_constrain_regression1(self):
         context2 = context.constrain(experiment='historical')
         self.assertTrue('experiment' in context2.facet_constraints)
 
-    @pytest.mark.xfail("may sometimes fail if server returns incomplete set of results")
+    @pytest.mark.xfail(reason="may sometimes fail if server returns incomplete set of results")
     def test_negative_facet(self):
         conn = SearchConnection(self.test_service, cache=self.cache)