-
Notifications
You must be signed in to change notification settings - Fork 1k
/
ESGroupBy.java
128 lines (107 loc) · 6.29 KB
/
ESGroupBy.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
package org.graylog.plugins.enterprise.search.elasticsearch.searchtypes;
import com.google.common.base.Splitter;
import com.google.common.collect.ImmutableList;
import com.google.common.primitives.Ints;
import io.searchbox.core.SearchResult;
import io.searchbox.core.search.aggregation.MetricAggregation;
import io.searchbox.core.search.aggregation.TermsAggregation;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.script.Script;
import org.elasticsearch.script.ScriptType;
import org.elasticsearch.search.aggregations.AbstractAggregationBuilder;
import org.elasticsearch.search.aggregations.AggregationBuilders;
import org.elasticsearch.search.aggregations.bucket.terms.Terms;
import org.elasticsearch.search.sort.SortOrder;
import org.graylog.plugins.enterprise.search.Query;
import org.graylog.plugins.enterprise.search.SearchJob;
import org.graylog.plugins.enterprise.search.SearchType;
import org.graylog.plugins.enterprise.search.elasticsearch.ESGeneratedQueryContext;
import org.graylog.plugins.enterprise.search.searchtypes.GroupBy;
import java.util.Collections;
import java.util.List;
import java.util.Locale;
public class ESGroupBy implements ESSearchTypeHandler<GroupBy> {
// This is the "WORD SEPARATOR MIDDLE DOT" unicode character. It's used to join and split the term values in a
// stacked group-by query.
private static final String STACKED_TERMS_AGG_SEPARATOR = "\u2E31";
AbstractAggregationBuilder createTermsBuilder(String field, List<String> stackedFields, GroupBy groupBy) {
final int size = Ints.saturatedCast(groupBy.limit());
final Terms.Order termsOrder = Terms.Order.count(groupBy.order() == SortOrder.ASC);
if (stackedFields.isEmpty()) {
// Wrap terms aggregation in a no-op filter to make sure the result structure is correct when not having
// stacked fields.
return AggregationBuilders.filter(filterAggName(groupBy), QueryBuilders.matchAllQuery())
.subAggregation(AggregationBuilders.terms(termsAggName(groupBy))
.field(field)
.size(size)
.order(termsOrder));
}
// If the methods gets stacked fields, we have to use scripting to concatenate the fields.
// There is currently no other way to do this. (as of ES 5.6)
final StringBuilder scriptStringBuilder = new StringBuilder();
// Build a filter for the terms aggregation to make sure we only get terms for messages where all fields
// exist.
final BoolQueryBuilder filterQuery = QueryBuilders.boolQuery();
// Add the main field
scriptStringBuilder.append("doc['").append(field).append("'].value");
filterQuery.must(QueryBuilders.existsQuery(field));
// Add all other fields
stackedFields.forEach(f -> {
// There is no way to use some kind of structured value for the stacked fields in the painless script
// so we have to use a "special" character (that is hopefully not showing up in any value) to join the
// stacked field values. That allows us to split the result again later to create a field->value mapping.
scriptStringBuilder.append(" + \"").append(STACKED_TERMS_AGG_SEPARATOR).append("\" + ");
scriptStringBuilder.append("doc['").append(f).append("'].value");
filterQuery.must(QueryBuilders.existsQuery(f));
});
return AggregationBuilders.filter(filterAggName(groupBy), filterQuery)
.subAggregation(AggregationBuilders.terms(termsAggName(groupBy))
.script(new Script(ScriptType.INLINE, "painless", scriptStringBuilder.toString(), Collections.emptyMap()))
.size(size)
.order(termsOrder));
}
@Override
public void doGenerateQueryPart(SearchJob job, Query query, GroupBy groupBy, ESGeneratedQueryContext queryContext) {
final String mainField = groupBy.fields().get(0);
final List<String> stackedFields = groupBy.fields().subList(1, groupBy.fields().size());
queryContext.searchSourceBuilder().aggregation(createTermsBuilder(mainField, stackedFields, groupBy));
}
@Override
public SearchType.Result doExtractResult(SearchJob job, Query query, GroupBy groupBy, SearchResult queryResult, MetricAggregation aggregations, ESGeneratedQueryContext queryContext) {
final TermsAggregation termsAggregation = aggregations
.getFilterAggregation(filterAggName(groupBy))
.getTermsAggregation(termsAggName(groupBy));
return extractTermsAggregationResult(groupBy, termsAggregation);
}
GroupBy.Result extractTermsAggregationResult(GroupBy groupBy, TermsAggregation termsAggregation) {
final ImmutableList.Builder<GroupBy.Group> groups = ImmutableList.builder();
for (final TermsAggregation.Entry entry : termsAggregation.getBuckets()) {
// Use the "special" character to split up the terms value so we can create a field->value mapping.
final List<String> valueList = Splitter.on(STACKED_TERMS_AGG_SEPARATOR).splitToList(entry.getKey());
// For every field in the field list, get the value from the split up terms value list. After this, we
// have a mapping of field->value for each bucket.
final ImmutableList.Builder<GroupBy.GroupField> fields = ImmutableList.builder();
for (int i = 0; i < groupBy.fields().size(); i++) {
fields.add(GroupBy.GroupField.builder()
.field(groupBy.fields().get(i))
.value(valueList.get(i))
.build());
}
groups.add(GroupBy.Group.builder()
.count(entry.getCount())
.fields(fields.build())
.build());
}
return GroupBy.Result.builder()
.id(groupBy.id())
.groups(groups.build())
.build();
}
String filterAggName(GroupBy groupBy) {
return String.format(Locale.ENGLISH, "group-by-filter-%s", groupBy.id());
}
String termsAggName(GroupBy groupBy) {
return String.format(Locale.ENGLISH, "group-by-terms-%s", groupBy.id());
}
}