Skip to content

Commit

Permalink
Aggregations: The nested aggregator should iterate over the child doc…
Browse files Browse the repository at this point in the history
… ids in ascending order.

The reverse_nested aggregator requires that the emitted doc ids are always in ascending order, which is already enforced on the scorer level,
but this also needs to be enforced on the nested aggrgetor level otherwise incorrect counts are a result.

Closes elastic#7505
Closes elastic#7514
  • Loading branch information
martijnvg committed Aug 29, 2014
1 parent 4a0e7a4 commit c2eb815
Show file tree
Hide file tree
Showing 3 changed files with 174 additions and 83 deletions.
Expand Up @@ -107,10 +107,10 @@ public void collect(int parentDoc, long bucketOrd) throws IOException {
}
int prevParentDoc = parentDocs.prevSetBit(parentDoc - 1);
int numChildren = 0;
for (int i = (parentDoc - 1); i > prevParentDoc; i--) {
if (childDocs.get(i)) {
for (int childDocId = prevParentDoc + 1; childDocId < parentDoc; childDocId++) {
if (childDocs.get(childDocId)) {
++numChildren;
collectBucketNoCounts(i, bucketOrd);
collectBucketNoCounts(childDocId, bucketOrd);
}
}
incrementBucketDocCount(numChildren, bucketOrd);
Expand Down
Expand Up @@ -84,7 +84,7 @@ public void setNextReader(AtomicReaderContext reader) {
bucketOrdToLastCollectedParentDoc.clear();
try {
// In ES if parent is deleted, then also the children are deleted, so the child docs this agg receives
// must belong to parent docs that are live. For this reason acceptedDocs can also null here.
// must belong to parent docs that is alive. For this reason acceptedDocs can be null here.
DocIdSet docIdSet = parentFilter.getDocIdSet(reader, null);
if (DocIdSets.isEmpty(docIdSet)) {
parentDocs = null;
Expand Down
Expand Up @@ -70,23 +70,31 @@ public void setupSuiteScopeCluster() throws Exception {
)
);

insertDocs(Arrays.asList("a", "b", "c"), Arrays.asList("1", "2", "3", "4"));
insertDocs(Arrays.asList("b", "c", "d"), Arrays.asList("4", "5", "6", "7"));
insertDocs(Arrays.asList("c", "d", "e"), Arrays.asList("7", "8", "9", "1"));
insertType1(Arrays.asList("a", "b", "c"), Arrays.asList("1", "2", "3", "4"));
insertType1(Arrays.asList("b", "c", "d"), Arrays.asList("4", "5", "6", "7"));
insertType1(Arrays.asList("c", "d", "e"), Arrays.asList("7", "8", "9", "1"));
refresh();
insertDocs(Arrays.asList("a", "e"), Arrays.asList("7", "4", "1", "1"));
insertDocs(Arrays.asList("a", "c"), Arrays.asList("2", "1"));
insertDocs(Arrays.asList("a"), Arrays.asList("3", "4"));
insertType1(Arrays.asList("a", "e"), Arrays.asList("7", "4", "1", "1"));
insertType1(Arrays.asList("a", "c"), Arrays.asList("2", "1"));
insertType1(Arrays.asList("a"), Arrays.asList("3", "4"));
refresh();
insertDocs(Arrays.asList("x", "c"), Arrays.asList("1", "8"));
insertDocs(Arrays.asList("y", "c"), Arrays.asList("6"));
insertDocs(Arrays.asList("z"), Arrays.asList("5", "9"));
insertType1(Arrays.asList("x", "c"), Arrays.asList("1", "8"));
insertType1(Arrays.asList("y", "c"), Arrays.asList("6"));
insertType1(Arrays.asList("z"), Arrays.asList("5", "9"));
refresh();

insertType2(new String[][]{new String[]{"a", "0", "0", "1", "2"}, new String[]{"b", "0", "1", "1", "2"}, new String[]{"a", "0"}});
insertType2(new String[][]{new String[]{"c", "1", "1", "2", "2"}, new String[]{"d", "3", "4"}});
refresh();

insertType2(new String[][]{new String[]{"a", "0", "0", "0", "0"}, new String[]{"b", "0", "0", "0", "0"}});
insertType2(new String[][]{new String[]{"e", "1", "2"}, new String[]{"f", "3", "4"}});
refresh();

ensureSearchable();
}

private void insertDocs(List<String> values1, List<String> values2) throws Exception {
private void insertType1(List<String> values1, List<String> values2) throws Exception {
XContentBuilder source = jsonBuilder()
.startObject()
.array("field1", values1.toArray())
Expand All @@ -96,17 +104,20 @@ private void insertDocs(List<String> values1, List<String> values2) throws Excep
}
source.endArray().endObject();
indexRandom(false, client().prepareIndex("idx", "type1").setRouting("1").setSource(source));
}

source = jsonBuilder()
private void insertType2(String[][] values) throws Exception {
XContentBuilder source = jsonBuilder()
.startObject()
.field("x", "y")
.startArray("nested1").startObject()
.array("field1", values1.toArray())
.startArray("nested2");
for (String value1 : values2) {
source.startObject().field("field2", value1).endObject();
.startArray("nested1");
for (String[] value : values) {
source.startObject().field("field1", value[0]).startArray("nested2");
for (int i = 1; i < value.length; i++) {
source.startObject().field("field2", value[i]).endObject();
}
source.endArray().endObject();
}
source.endArray().endObject().endArray().endObject();
source.endArray().endObject();
indexRandom(false, client().prepareIndex("idx", "type2").setRouting("1").setSource(source));
}

Expand All @@ -126,67 +137,6 @@ public void simple_reverseNestedToRoot() throws Exception {
)
).get();

verifyResults(response);
}

@Test
public void simple_nested1ToRootToNested2() throws Exception {
SearchResponse response = client().prepareSearch("idx").setTypes("type2")
.addAggregation(nested("nested1").path("nested1")
.subAggregation(
reverseNested("nested1_to_root")
.subAggregation(nested("root_to_nested2").path("nested1.nested2"))
)
)
.get();

assertSearchResponse(response);
Nested nested = response.getAggregations().get("nested1");
assertThat(nested.getName(), equalTo("nested1"));
assertThat(nested.getDocCount(), equalTo(9l));
ReverseNested reverseNested = nested.getAggregations().get("nested1_to_root");
assertThat(reverseNested.getName(), equalTo("nested1_to_root"));
assertThat(reverseNested.getDocCount(), equalTo(9l));
nested = reverseNested.getAggregations().get("root_to_nested2");
assertThat(nested.getName(), equalTo("root_to_nested2"));
assertThat(nested.getDocCount(), equalTo(25l));
}

@Test
public void simple_reverseNestedToNested1() throws Exception {
SearchResponse response = client().prepareSearch("idx")
.addAggregation(nested("nested1").path("nested1.nested2")
.subAggregation(
terms("field2").field("nested1.nested2.field2")
.collectMode(randomFrom(SubAggCollectionMode.values()))
.subAggregation(
reverseNested("nested1_to_field1").path("nested1")
.subAggregation(
terms("field1").field("nested1.field1")
.collectMode(randomFrom(SubAggCollectionMode.values()))
)
)
)
).get();
verifyResults(response);
}

@Test(expected = SearchPhaseExecutionException.class)
public void testReverseNestedAggWithoutNestedAgg() throws Exception {
client().prepareSearch("idx")
.addAggregation(terms("field2").field("nested1.nested2.field2")
.collectMode(randomFrom(SubAggCollectionMode.values()))
.subAggregation(
reverseNested("nested1_to_field1")
.subAggregation(
terms("field1").field("nested1.field1")
.collectMode(randomFrom(SubAggCollectionMode.values()))
)
)
).get();
}

private void verifyResults(SearchResponse response) {
assertSearchResponse(response);

Nested nested = response.getAggregations().get("nested1");
Expand Down Expand Up @@ -357,4 +307,145 @@ private void verifyResults(SearchResponse response) {
assertThat(tagsBuckets.get(3).getKey(), equalTo("z"));
assertThat(tagsBuckets.get(3).getDocCount(), equalTo(1l));
}

@Test
public void simple_nested1ToRootToNested2() throws Exception {
SearchResponse response = client().prepareSearch("idx").setTypes("type2")
.addAggregation(nested("nested1").path("nested1")
.subAggregation(
reverseNested("nested1_to_root")
.subAggregation(nested("root_to_nested2").path("nested1.nested2"))
)
)
.get();

assertSearchResponse(response);
Nested nested = response.getAggregations().get("nested1");
assertThat(nested.getName(), equalTo("nested1"));
assertThat(nested.getDocCount(), equalTo(9l));
ReverseNested reverseNested = nested.getAggregations().get("nested1_to_root");
assertThat(reverseNested.getName(), equalTo("nested1_to_root"));
assertThat(reverseNested.getDocCount(), equalTo(4l));
nested = reverseNested.getAggregations().get("root_to_nested2");
assertThat(nested.getName(), equalTo("root_to_nested2"));
assertThat(nested.getDocCount(), equalTo(27l));
}

@Test
public void simple_reverseNestedToNested1() throws Exception {
SearchResponse response = client().prepareSearch("idx").setTypes("type2")
.addAggregation(nested("nested1").path("nested1.nested2")
.subAggregation(
terms("field2").field("nested1.nested2.field2").order(Terms.Order.term(true))
.collectMode(randomFrom(SubAggCollectionMode.values()))
.size(0)
.subAggregation(
reverseNested("nested1_to_field1").path("nested1")
.subAggregation(
terms("field1").field("nested1.field1").order(Terms.Order.term(true))
.collectMode(randomFrom(SubAggCollectionMode.values()))
)
)
)
).get();

assertSearchResponse(response);

Nested nested = response.getAggregations().get("nested1");
assertThat(nested, notNullValue());
assertThat(nested.getName(), equalTo("nested1"));
assertThat(nested.getDocCount(), equalTo(27l));
assertThat(nested.getAggregations().asList().isEmpty(), is(false));

Terms usernames = nested.getAggregations().get("field2");
assertThat(usernames, notNullValue());
assertThat(usernames.getBuckets().size(), equalTo(5));
List<Terms.Bucket> usernameBuckets = new ArrayList<>(usernames.getBuckets());

Terms.Bucket bucket = usernameBuckets.get(0);
assertThat(bucket.getKey(), equalTo("0"));
assertThat(bucket.getDocCount(), equalTo(12l));
ReverseNested reverseNested = bucket.getAggregations().get("nested1_to_field1");
assertThat(reverseNested.getDocCount(), equalTo(5l));
Terms tags = reverseNested.getAggregations().get("field1");
List<Terms.Bucket> tagsBuckets = new ArrayList<>(tags.getBuckets());
assertThat(tagsBuckets.size(), equalTo(2));
assertThat(tagsBuckets.get(0).getKey(), equalTo("a"));
assertThat(tagsBuckets.get(0).getDocCount(), equalTo(3l));
assertThat(tagsBuckets.get(1).getKey(), equalTo("b"));
assertThat(tagsBuckets.get(1).getDocCount(), equalTo(2l));

bucket = usernameBuckets.get(1);
assertThat(bucket.getKey(), equalTo("1"));
assertThat(bucket.getDocCount(), equalTo(6l));
reverseNested = bucket.getAggregations().get("nested1_to_field1");
assertThat(reverseNested.getDocCount(), equalTo(4l));
tags = reverseNested.getAggregations().get("field1");
tagsBuckets = new ArrayList<>(tags.getBuckets());
assertThat(tagsBuckets.size(), equalTo(4));
assertThat(tagsBuckets.get(0).getKey(), equalTo("a"));
assertThat(tagsBuckets.get(0).getDocCount(), equalTo(1l));
assertThat(tagsBuckets.get(1).getKey(), equalTo("b"));
assertThat(tagsBuckets.get(1).getDocCount(), equalTo(1l));
assertThat(tagsBuckets.get(2).getKey(), equalTo("c"));
assertThat(tagsBuckets.get(2).getDocCount(), equalTo(1l));
assertThat(tagsBuckets.get(3).getKey(), equalTo("e"));
assertThat(tagsBuckets.get(3).getDocCount(), equalTo(1l));

bucket = usernameBuckets.get(2);
assertThat(bucket.getKey(), equalTo("2"));
assertThat(bucket.getDocCount(), equalTo(5l));
reverseNested = bucket.getAggregations().get("nested1_to_field1");
assertThat(reverseNested.getDocCount(), equalTo(4l));
tags = reverseNested.getAggregations().get("field1");
tagsBuckets = new ArrayList<>(tags.getBuckets());
assertThat(tagsBuckets.size(), equalTo(4));
assertThat(tagsBuckets.get(0).getKey(), equalTo("a"));
assertThat(tagsBuckets.get(0).getDocCount(), equalTo(1l));
assertThat(tagsBuckets.get(1).getKey(), equalTo("b"));
assertThat(tagsBuckets.get(1).getDocCount(), equalTo(1l));
assertThat(tagsBuckets.get(2).getKey(), equalTo("c"));
assertThat(tagsBuckets.get(2).getDocCount(), equalTo(1l));
assertThat(tagsBuckets.get(3).getKey(), equalTo("e"));
assertThat(tagsBuckets.get(3).getDocCount(), equalTo(1l));

bucket = usernameBuckets.get(3);
assertThat(bucket.getKey(), equalTo("3"));
assertThat(bucket.getDocCount(), equalTo(2l));
reverseNested = bucket.getAggregations().get("nested1_to_field1");
assertThat(reverseNested.getDocCount(), equalTo(2l));
tags = reverseNested.getAggregations().get("field1");
tagsBuckets = new ArrayList<>(tags.getBuckets());
assertThat(tagsBuckets.size(), equalTo(2));
assertThat(tagsBuckets.get(0).getKey(), equalTo("d"));
assertThat(tagsBuckets.get(0).getDocCount(), equalTo(1l));
assertThat(tagsBuckets.get(1).getKey(), equalTo("f"));

bucket = usernameBuckets.get(4);
assertThat(bucket.getKey(), equalTo("4"));
assertThat(bucket.getDocCount(), equalTo(2l));
reverseNested = bucket.getAggregations().get("nested1_to_field1");
assertThat(reverseNested.getDocCount(), equalTo(2l));
tags = reverseNested.getAggregations().get("field1");
tagsBuckets = new ArrayList<>(tags.getBuckets());
assertThat(tagsBuckets.size(), equalTo(2));
assertThat(tagsBuckets.get(0).getKey(), equalTo("d"));
assertThat(tagsBuckets.get(0).getDocCount(), equalTo(1l));
assertThat(tagsBuckets.get(1).getKey(), equalTo("f"));
}

@Test(expected = SearchPhaseExecutionException.class)
public void testReverseNestedAggWithoutNestedAgg() throws Exception {
client().prepareSearch("idx")
.addAggregation(terms("field2").field("nested1.nested2.field2")
.collectMode(randomFrom(SubAggCollectionMode.values()))
.subAggregation(
reverseNested("nested1_to_field1")
.subAggregation(
terms("field1").field("nested1.field1")
.collectMode(randomFrom(SubAggCollectionMode.values()))
)
)
).get();
}
}

0 comments on commit c2eb815

Please sign in to comment.