Merge 6871e79 into c3f16cb

4dn-dcic · Nov 19, 2020 · c7b43ff · c7b43ff
2 parents c3f16cb + 6871e79
commit c7b43ff
Show file tree

Hide file tree

Showing 4 changed files with 198 additions and 11 deletions.
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,7 +1,7 @@
 [tool.poetry]
 # Note: Various modules refer to this system as "encoded", not "fourfront".
 name = "encoded"
-version = "2.3.2"
+version = "2.3.3"
 description = "4DN-DCIC Fourfront"
 authors = ["4DN-DCIC Team <support@4dnucleome.org>"]
 license = "MIT"

diff --git a/src/encoded/search.py b/src/encoded/search.py
@@ -1241,29 +1241,48 @@ def set_facets(search, facets, search_filters, string_query, request, doc_types,
 
         ## Create the aggregation itself, extend facet with info to pass down to front-end
         agg_name = field.replace('.', '-')
+        agg_type = facet.get('aggregation_type', 'terms')
+        agg_id = agg_type + ':' + agg_name
+        facet_filters = generate_filters_for_terms_agg_from_search_filters(query_field, search_filters, string_query)
 
-        if facet.get('aggregation_type') == 'stats':
+        # handle stats aggregetation
+        if agg_type == 'stats':
 
             if is_date_field:
                 facet['field_type'] = 'date'
             elif is_numerical_field:
                 facet['field_type'] = field_schema['type'] or "number"
 
-            aggs[facet['aggregation_type'] + ":" + agg_name] = {
+            aggs[agg_id] = {
+                'aggs': {
+                    "primary_agg": {
+                        'stats': {
+                            'field': query_field
+                        }
+                    }
+                },
+                'filter': {'bool': facet_filters}
+            }
+
+        # handle range aggregation
+        elif agg_type == 'range':
+            ranges = [{k: v for k, v in r.items() if k in ['from', 'to']} for r in facet['ranges']]
+            aggs[agg_id] = {
                 'aggs': {
-                    "primary_agg" : {
-                        'stats' : {
-                            'field' : query_field
+                    'primary_agg': {
+                        'range': {
+                            'field': query_field,
+                            'ranges': ranges
                         }
                     }
                 },
                 'filter': {'bool': facet_filters}
             }
 
-        else: # Default -- facetable terms
+        # default - terms aggregation
+        else:
 
             facet['aggregation_type'] = 'terms'
-            facet_filters = generate_filters_for_terms_agg_from_search_filters(query_field, search_filters, string_query)
             term_aggregation = {
                 "terms" : {
                     'size'    : 100,            # Maximum terms returned (default=10); see https://github.com/10up/ElasticPress/wiki/Working-with-Aggregations
@@ -1401,15 +1420,31 @@ def format_facets(es_results, facets, total, additional_facets, search_frame='em
         result_facet.update({ k:v for k,v in facet.items() if k not in result_facet.keys() })
         used_facets.add(field)
         field_agg_name = field.replace('.', '-')
-        full_agg_name = facet['aggregation_type'] + ':' + field_agg_name
+        agg_type = facet['aggregation_type']
+        full_agg_name = agg_type + ':' + field_agg_name
 
         if full_agg_name in aggregations:
-            if facet['aggregation_type'] == 'stats':
+            if agg_type == 'stats':
                 result_facet['total'] = aggregations[full_agg_name]['doc_count']
                 # Used for fields on which can do range filter on, to provide min + max bounds
                 for k in aggregations[full_agg_name]["primary_agg"].keys():
                     result_facet[k] = aggregations[full_agg_name]["primary_agg"][k]
-            else: # 'terms' assumed.
+
+            elif agg_type == 'range':
+                bucket_location = aggregations[full_agg_name]['primary_agg']
+
+                # TODO - refactor ?
+                # merge bucket labels from ranges into buckets
+                for r in result_facet['ranges']:
+                    for b in bucket_location['buckets']:
+
+                        # if ranges match we found our bucket, propagate doc_count into 'ranges' field
+                        if (r.get('from', -1) == b.get('from', -1)) and (r.get('to', -1) == b.get('to', -1)):
+                            r['doc_count'] = b['doc_count']
+                            break
+
+            # 'terms' assumed
+            else:
                 # Default - terms, range, or histogram buckets. Buckets may not be present
                 result_facet['terms'] = aggregations[full_agg_name]["primary_agg"]["buckets"]
                 # Choosing to show facets with one term for summary info on search it provides

diff --git a/src/encoded/tests/test_search.py b/src/encoded/tests/test_search.py
@@ -867,3 +867,92 @@ def test_search_additional_non_nested_facets(self, many_non_nested_facets, _face
         """
         self.check_and_verify_result(many_non_nested_facets, _facet, n_expected)
 
+
+@pytest.fixture(scope='session')
+def bucket_range_data_raw():
+    """ 10 objects with a numerical field we will bucket on """
+    return [{
+        'special_integer': i,
+        'special_object_that_holds_integer': {
+            'embedded_integer': i
+        },
+        'array_of_objects_that_holds_integer': [
+            {
+                'embedded_identifier': 'forward',
+                'embedded_integer': 0 if i < 5 else 9
+            },
+            {
+                'embedded_identifier': 'reverse',
+                'embedded_integer': 9 if i < 5 else 0
+            },
+        ]
+    } for i in range(10)]
+
+
+@pytest.fixture(scope='module')  # XXX: consider scope further - Will 11/5/2020
+def bucket_range_data(testapp, bucket_range_data_raw):
+    for entry in bucket_range_data_raw:
+        testapp.post_json('/TestingBucketRangeFacets', entry, status=201)
+    testapp.post_json('/index', {'record': False})
+
+
+class TestSearchBucketRangeFacets:
+    """ Class that encapsulates tests for BucketRanges """
+
+    @staticmethod
+    def verify_facet_counts(facets, expected_fields, expected_cardinality, expected_count):
+        """ Checks for given expected facets, checking bucket cardinality and document count
+            Note that the actual range properties are trivial (we are not testing elasticsearch)
+        """
+        for facet in facets:
+            if facet['field'] in expected_fields:
+                assert len(facet['ranges']) == expected_cardinality
+                for bucket in facet['ranges']:
+                    assert bucket['doc_count'] == expected_count
+
+    @staticmethod
+    def select_facet(facets, facet_name):
+        result = None
+        for facet in facets:
+            if facet['field'] == facet_name:
+                result = facet
+                break
+        return result
+
+    @pytest.fixture(scope='module')
+    def bucket_range_facet_result(self, testapp, bucket_range_data):
+        return testapp.get('/search/?type=TestingBucketRangeFacets').json['facets']
+
+    @pytest.mark.parametrize('expected_fields, expected_counts', [
+        (['special_integer', 'special_object_that_holds_integer.embedded_integer'], 5),
+        (['array_of_objects_that_holds_integer.embedded_integer'], 10)
+    ])
+    def test_search_bucket_range_simple(self, bucket_range_facet_result, expected_fields, expected_counts):
+        """ Tests searching a collection of documents with varying integer field types that
+            have the same distribution - all of which should give the same results. """
+        self.verify_facet_counts(bucket_range_facet_result, expected_fields, 2, expected_counts)
+
+    @pytest.mark.parametrize('identifier', [
+        'reverse', 'forward'
+    ])
+    def test_search_bucket_range_nested_qualifier(self, testapp, bucket_range_data, identifier):
+        """ Tests aggregating on a nested field while selecting for a field within the nested object. """
+        res = testapp.get('/search/?type=TestingBucketRangeFacets'
+                          '&array_of_objects_that_holds_integer.embedded_identifier=%s' % identifier).json['facets']
+        self.verify_facet_counts(res, ['array_of_objects_that_holds_integer.embedded_integer'],
+                                 2, 10)
+
+    @pytest.mark.parametrize('identifier', [
+        'reverse', 'forward'
+    ])
+    def test_search_bucket_range_nested_qualifier(self, testapp, bucket_range_data, identifier):
+        """ Tests aggregating on a nested field while selecting for a field within the nested object (no change). """
+        res = testapp.get('/search/?type=TestingBucketRangeFacets'
+                          '&array_of_objects_that_holds_integer.embedded_integer.from=6'
+                          '&array_of_objects_that_holds_integer.embedded_identifier=%s' % identifier).json['facets']
+        self.verify_facet_counts(res, ['array_of_objects_that_holds_integer.embedded_integer'],
+                                 2, 10)
+        facet_with_labels = self.select_facet(res, 'array_of_objects_that_holds_integer.embedded_integer')
+        for r in facet_with_labels['ranges']:
+            assert 'label' in r
+            assert r['label'] in ['Low', 'High']
diff --git a/src/encoded/tests/testing_views.py b/src/encoded/tests/testing_views.py
@@ -386,3 +386,66 @@ class TestingHiddenFacets(Item):
     def non_nested_array_of_objects(self, unfaceted_array_of_objects):
         """ Non-nested view of the unfaceted_array_of_objects field """
         return unfaceted_array_of_objects
+
+
+@collection('testing-bucket-range-facets')
+class TestingBucketRangeFacets(Item):
+    """ Collection for testing BucketRange facets. """
+    item_type = 'testing_bucket_range_facets'
+    schema = {
+        'type': 'object',
+        'properties': {
+            'special_integer': {
+                'type': 'integer'
+            },
+            'special_object_that_holds_integer': {
+                'type': 'object',
+                'properties': {
+                    'embedded_integer': {
+                        'type': 'integer'
+                    }
+                }
+            },
+            'array_of_objects_that_holds_integer': {
+                'type': 'array',
+                'items': {
+                    'type': 'object',
+                    'enable_nested': True,
+                    'properties': {
+                        'embedded_identifier': {
+                            'type': 'string'
+                        },
+                        'embedded_integer': {
+                            'type': 'integer'
+                        }
+                    }
+                }
+            }
+        },
+        'facets': {
+            'special_integer': {
+                'title': 'Special Integer',
+                'aggregation_type': 'range',
+                'ranges': [
+                    {'from': 0, 'to': 5},
+                    {'from': 5, 'to': 10}
+                ]
+            },
+            'special_object_that_holds_integer.embedded_integer': {
+                'title': 'Single Object Embedded Integer',
+                'aggregation_type': 'range',
+                'ranges': [
+                    {'from': 0, 'to': 5},
+                    {'from': 5, 'to': 10}
+                ]
+            },
+            'array_of_objects_that_holds_integer.embedded_integer': {
+                'title': 'Array of Objects Embedded Integer',
+                'aggregation_type': 'range',
+                'ranges': [
+                    {'from': 0, 'to': 5, 'label': 'Low'},
+                    {'from': 5, 'to': 10, 'label': 'High'}
+                ]
+            }
+        }
+    }