Skip to content

Commit

Permalink
Merge 6871e79 into c3f16cb
Browse files Browse the repository at this point in the history
  • Loading branch information
willronchetti committed Nov 19, 2020
2 parents c3f16cb + 6871e79 commit c7b43ff
Show file tree
Hide file tree
Showing 4 changed files with 198 additions and 11 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[tool.poetry]
# Note: Various modules refer to this system as "encoded", not "fourfront".
name = "encoded"
version = "2.3.2"
version = "2.3.3"
description = "4DN-DCIC Fourfront"
authors = ["4DN-DCIC Team <support@4dnucleome.org>"]
license = "MIT"
Expand Down
55 changes: 45 additions & 10 deletions src/encoded/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -1241,29 +1241,48 @@ def set_facets(search, facets, search_filters, string_query, request, doc_types,

## Create the aggregation itself, extend facet with info to pass down to front-end
agg_name = field.replace('.', '-')
agg_type = facet.get('aggregation_type', 'terms')
agg_id = agg_type + ':' + agg_name
facet_filters = generate_filters_for_terms_agg_from_search_filters(query_field, search_filters, string_query)

if facet.get('aggregation_type') == 'stats':
# handle stats aggregetation
if agg_type == 'stats':

if is_date_field:
facet['field_type'] = 'date'
elif is_numerical_field:
facet['field_type'] = field_schema['type'] or "number"

aggs[facet['aggregation_type'] + ":" + agg_name] = {
aggs[agg_id] = {
'aggs': {
"primary_agg": {
'stats': {
'field': query_field
}
}
},
'filter': {'bool': facet_filters}
}

# handle range aggregation
elif agg_type == 'range':
ranges = [{k: v for k, v in r.items() if k in ['from', 'to']} for r in facet['ranges']]
aggs[agg_id] = {
'aggs': {
"primary_agg" : {
'stats' : {
'field' : query_field
'primary_agg': {
'range': {
'field': query_field,
'ranges': ranges
}
}
},
'filter': {'bool': facet_filters}
}

else: # Default -- facetable terms
# default - terms aggregation
else:

facet['aggregation_type'] = 'terms'
facet_filters = generate_filters_for_terms_agg_from_search_filters(query_field, search_filters, string_query)
term_aggregation = {
"terms" : {
'size' : 100, # Maximum terms returned (default=10); see https://github.com/10up/ElasticPress/wiki/Working-with-Aggregations
Expand Down Expand Up @@ -1401,15 +1420,31 @@ def format_facets(es_results, facets, total, additional_facets, search_frame='em
result_facet.update({ k:v for k,v in facet.items() if k not in result_facet.keys() })
used_facets.add(field)
field_agg_name = field.replace('.', '-')
full_agg_name = facet['aggregation_type'] + ':' + field_agg_name
agg_type = facet['aggregation_type']
full_agg_name = agg_type + ':' + field_agg_name

if full_agg_name in aggregations:
if facet['aggregation_type'] == 'stats':
if agg_type == 'stats':
result_facet['total'] = aggregations[full_agg_name]['doc_count']
# Used for fields on which can do range filter on, to provide min + max bounds
for k in aggregations[full_agg_name]["primary_agg"].keys():
result_facet[k] = aggregations[full_agg_name]["primary_agg"][k]
else: # 'terms' assumed.

elif agg_type == 'range':
bucket_location = aggregations[full_agg_name]['primary_agg']

# TODO - refactor ?
# merge bucket labels from ranges into buckets
for r in result_facet['ranges']:
for b in bucket_location['buckets']:

# if ranges match we found our bucket, propagate doc_count into 'ranges' field
if (r.get('from', -1) == b.get('from', -1)) and (r.get('to', -1) == b.get('to', -1)):
r['doc_count'] = b['doc_count']
break

# 'terms' assumed
else:
# Default - terms, range, or histogram buckets. Buckets may not be present
result_facet['terms'] = aggregations[full_agg_name]["primary_agg"]["buckets"]
# Choosing to show facets with one term for summary info on search it provides
Expand Down
89 changes: 89 additions & 0 deletions src/encoded/tests/test_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -867,3 +867,92 @@ def test_search_additional_non_nested_facets(self, many_non_nested_facets, _face
"""
self.check_and_verify_result(many_non_nested_facets, _facet, n_expected)


@pytest.fixture(scope='session')
def bucket_range_data_raw():
""" 10 objects with a numerical field we will bucket on """
return [{
'special_integer': i,
'special_object_that_holds_integer': {
'embedded_integer': i
},
'array_of_objects_that_holds_integer': [
{
'embedded_identifier': 'forward',
'embedded_integer': 0 if i < 5 else 9
},
{
'embedded_identifier': 'reverse',
'embedded_integer': 9 if i < 5 else 0
},
]
} for i in range(10)]


@pytest.fixture(scope='module') # XXX: consider scope further - Will 11/5/2020
def bucket_range_data(testapp, bucket_range_data_raw):
for entry in bucket_range_data_raw:
testapp.post_json('/TestingBucketRangeFacets', entry, status=201)
testapp.post_json('/index', {'record': False})


class TestSearchBucketRangeFacets:
""" Class that encapsulates tests for BucketRanges """

@staticmethod
def verify_facet_counts(facets, expected_fields, expected_cardinality, expected_count):
""" Checks for given expected facets, checking bucket cardinality and document count
Note that the actual range properties are trivial (we are not testing elasticsearch)
"""
for facet in facets:
if facet['field'] in expected_fields:
assert len(facet['ranges']) == expected_cardinality
for bucket in facet['ranges']:
assert bucket['doc_count'] == expected_count

@staticmethod
def select_facet(facets, facet_name):
result = None
for facet in facets:
if facet['field'] == facet_name:
result = facet
break
return result

@pytest.fixture(scope='module')
def bucket_range_facet_result(self, testapp, bucket_range_data):
return testapp.get('/search/?type=TestingBucketRangeFacets').json['facets']

@pytest.mark.parametrize('expected_fields, expected_counts', [
(['special_integer', 'special_object_that_holds_integer.embedded_integer'], 5),
(['array_of_objects_that_holds_integer.embedded_integer'], 10)
])
def test_search_bucket_range_simple(self, bucket_range_facet_result, expected_fields, expected_counts):
""" Tests searching a collection of documents with varying integer field types that
have the same distribution - all of which should give the same results. """
self.verify_facet_counts(bucket_range_facet_result, expected_fields, 2, expected_counts)

@pytest.mark.parametrize('identifier', [
'reverse', 'forward'
])
def test_search_bucket_range_nested_qualifier(self, testapp, bucket_range_data, identifier):
""" Tests aggregating on a nested field while selecting for a field within the nested object. """
res = testapp.get('/search/?type=TestingBucketRangeFacets'
'&array_of_objects_that_holds_integer.embedded_identifier=%s' % identifier).json['facets']
self.verify_facet_counts(res, ['array_of_objects_that_holds_integer.embedded_integer'],
2, 10)

@pytest.mark.parametrize('identifier', [
'reverse', 'forward'
])
def test_search_bucket_range_nested_qualifier(self, testapp, bucket_range_data, identifier):
""" Tests aggregating on a nested field while selecting for a field within the nested object (no change). """
res = testapp.get('/search/?type=TestingBucketRangeFacets'
'&array_of_objects_that_holds_integer.embedded_integer.from=6'
'&array_of_objects_that_holds_integer.embedded_identifier=%s' % identifier).json['facets']
self.verify_facet_counts(res, ['array_of_objects_that_holds_integer.embedded_integer'],
2, 10)
facet_with_labels = self.select_facet(res, 'array_of_objects_that_holds_integer.embedded_integer')
for r in facet_with_labels['ranges']:
assert 'label' in r
assert r['label'] in ['Low', 'High']
63 changes: 63 additions & 0 deletions src/encoded/tests/testing_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -386,3 +386,66 @@ class TestingHiddenFacets(Item):
def non_nested_array_of_objects(self, unfaceted_array_of_objects):
""" Non-nested view of the unfaceted_array_of_objects field """
return unfaceted_array_of_objects


@collection('testing-bucket-range-facets')
class TestingBucketRangeFacets(Item):
""" Collection for testing BucketRange facets. """
item_type = 'testing_bucket_range_facets'
schema = {
'type': 'object',
'properties': {
'special_integer': {
'type': 'integer'
},
'special_object_that_holds_integer': {
'type': 'object',
'properties': {
'embedded_integer': {
'type': 'integer'
}
}
},
'array_of_objects_that_holds_integer': {
'type': 'array',
'items': {
'type': 'object',
'enable_nested': True,
'properties': {
'embedded_identifier': {
'type': 'string'
},
'embedded_integer': {
'type': 'integer'
}
}
}
}
},
'facets': {
'special_integer': {
'title': 'Special Integer',
'aggregation_type': 'range',
'ranges': [
{'from': 0, 'to': 5},
{'from': 5, 'to': 10}
]
},
'special_object_that_holds_integer.embedded_integer': {
'title': 'Single Object Embedded Integer',
'aggregation_type': 'range',
'ranges': [
{'from': 0, 'to': 5},
{'from': 5, 'to': 10}
]
},
'array_of_objects_that_holds_integer.embedded_integer': {
'title': 'Array of Objects Embedded Integer',
'aggregation_type': 'range',
'ranges': [
{'from': 0, 'to': 5, 'label': 'Low'},
{'from': 5, 'to': 10, 'label': 'High'}
]
}
}
}

0 comments on commit c7b43ff

Please sign in to comment.