elastic · martijnvg · May 8, 2014 · May 22, 2014 · May 22, 2014 · May 22, 2014
diff --git a/docs/reference/search/aggregations/bucket.asciidoc b/docs/reference/search/aggregations/bucket.asciidoc
@@ -26,4 +26,6 @@ include::bucket/datehistogram-aggregation.asciidoc[]
 
 include::bucket/geodistance-aggregation.asciidoc[]
 
-include::bucket/geohashgrid-aggregation.asciidoc[]
+include::bucket/geohashgrid-aggregation.asciidoc[]
+
+include::bucket/tophits-aggregation.asciidoc[]
diff --git a/docs/reference/search/aggregations/bucket/tophits-aggregations.asciidoc b/docs/reference/search/aggregations/bucket/tophits-aggregations.asciidoc
@@ -0,0 +1,147 @@
+[[search-aggregations-bucket-top-hits-aggregation]]
+=== Top hits Aggregation
+
+coming[1.3.0]
+
+The `top_hits` aggregator keeps track of the most relevant document being aggregated. This aggregator is intended to be
+used as a sub aggregator, so that the top matching documents can be aggregated per bucket.
+
+The `top_hits` aggregator can effectively be used to group result sets by certain fields via a bucket aggregator.
+One or more bucket aggregators determines by which properties a result set get sliced into.
+
+.Options:
+* `size` - The maximum number of top matching hits to return per bucket. By default the top three matching hits are returned.
+* `sort` - How the top matching hits should be sorted. By default the hits are sorted by the score of the main query.
+
+.Supported per hit features
+
+The top_hits aggregation returns regular search hits, because of this many per hit features can be supported:
+* {ref}/search-request-highlighting.html[Highlighting]
+* {ref}/search-request-explain.html[Explain]
+* {ref}/search-request-named-queries-and-filters.html[Named filters and queries]
+* {ref}/search-request-source-filtering.html[Source filtering]
+* {ref}/search-request-script-fields.html[Script fields]
+* {ref}/search-request-fielddata-fields.html[Fielddata fields]
+* {ref}/search-request-version.html[Include versions]
+
+.Example
+
+In the following example we group the questions by tag and per tag we show the last active question. For each question
+only the title field is being included in the source.
+
+[source,js]
+--------------------------------------------------
+{
+    "aggs": {
+        "terms": {
+            "top-tags": {
+                "field": "tags",
+                "size": 3
+            },
+            "aggs": {
+                "top_tag_hits": {
+                    "top_hits": {
+                        "sort": [
+                            {
+                                "last_activity_date": {
+                                    "order": "desc"
+                                }
+                            }
+                        ],
+                        "_source": {
+                            "include": [
+                                "title"
+                            ]
+                        },
+                        "size" : 1
+                    }
+                }
+            }
+        }
+    }
+}
+--------------------------------------------------
+
+Possible response snippet:
+
+[source,js]
+--------------------------------------------------
+"aggregations": {
+  "top-tags": {
+     "buckets": [
+        {
+           "key": "windows-7",
+           "doc_count": 25365,
+           "top_tags_hits": {
+              "hits": {
+                 "total": 25365,
+                 "max_score": 1,
+                 "hits": [
+                    {
+                       "_index": "stack",
+                       "_type": "question",
+                       "_id": "602679",
+                       "_score": 1,
+                       "_source": {
+                          "title": "Windows port opening"
+                       },
+                       "sort": [
+                          1370143231177
+                       ]
+                    }
+                 ]
+              }
+           }
+        },
+        {
+           "key": "linux",
+           "doc_count": 18342,
+           "top_tags_hits": {
+              "hits": {
+                 "total": 18342,
+                 "max_score": 1,
+                 "hits": [
+                    {
+                       "_index": "stack",
+                       "_type": "question",
+                       "_id": "602672",
+                       "_score": 1,
+                       "_source": {
+                          "title": "Ubuntu RFID Screensaver lock-unlock"
+                       },
+                       "sort": [
+                          1370143379747
+                       ]
+                    }
+                 ]
+              }
+           }
+        },
+        {
+           "key": "windows",
+           "doc_count": 18119,
+           "top_tags_hits": {
+              "hits": {
+                 "total": 18119,
+                 "max_score": 1,
+                 "hits": [
+                    {
+                       "_index": "stack",
+                       "_type": "question",
+                       "_id": "602678",
+                       "_score": 1,
+                       "_source": {
+                          "title": "If I change my computers date / time, what could be affected?"
+                       },
+                       "sort": [
+                          1370142868283
+                       ]
+                    }
+                 ]
+              }
+           }
+        }
+     ]
+  }
+}
+--------------------------------------------------
diff --git a/src/main/java/org/elasticsearch/search/aggregations/AggregationBuilders.java b/src/main/java/org/elasticsearch/search/aggregations/AggregationBuilders.java
@@ -32,6 +32,7 @@
 import org.elasticsearch.search.aggregations.bucket.nested.ReverseNestedBuilder;
 import org.elasticsearch.search.aggregations.bucket.significant.SignificantTermsBuilder;
 import org.elasticsearch.search.aggregations.bucket.terms.TermsBuilder;
+import org.elasticsearch.search.aggregations.bucket.tophits.TopHitsBuilder;
 import org.elasticsearch.search.aggregations.metrics.avg.AvgBuilder;
 import org.elasticsearch.search.aggregations.metrics.cardinality.CardinalityBuilder;
 import org.elasticsearch.search.aggregations.metrics.max.MaxBuilder;
@@ -141,4 +142,8 @@ public static PercentilesBuilder percentiles(String name) {
     public static CardinalityBuilder cardinality(String name) {
         return new CardinalityBuilder(name);
     }
+
+    public static TopHitsBuilder topHits(String name) {
+        return new TopHitsBuilder(name);
+    }
 }
diff --git a/src/main/java/org/elasticsearch/search/aggregations/AggregationModule.java b/src/main/java/org/elasticsearch/search/aggregations/AggregationModule.java
@@ -35,6 +35,7 @@
 import org.elasticsearch.search.aggregations.bucket.nested.ReverseNestedParser;
 import org.elasticsearch.search.aggregations.bucket.significant.SignificantTermsParser;
 import org.elasticsearch.search.aggregations.bucket.terms.TermsParser;
+import org.elasticsearch.search.aggregations.bucket.tophits.TopHitsParser;
 import org.elasticsearch.search.aggregations.metrics.avg.AvgParser;
 import org.elasticsearch.search.aggregations.metrics.cardinality.CardinalityParser;
 import org.elasticsearch.search.aggregations.metrics.max.MaxParser;
@@ -79,6 +80,7 @@ public AggregationModule() {
         parsers.add(GeoHashGridParser.class);
         parsers.add(NestedParser.class);
         parsers.add(ReverseNestedParser.class);
+        parsers.add(TopHitsParser.class);
     }
 
     /**

diff --git a/src/main/java/org/elasticsearch/search/aggregations/TransportAggregationModule.java b/src/main/java/org/elasticsearch/search/aggregations/TransportAggregationModule.java
@@ -38,6 +38,7 @@
 import org.elasticsearch.search.aggregations.bucket.terms.LongTerms;
 import org.elasticsearch.search.aggregations.bucket.terms.StringTerms;
 import org.elasticsearch.search.aggregations.bucket.terms.UnmappedTerms;
+import org.elasticsearch.search.aggregations.bucket.tophits.InternalTopHits;
 import org.elasticsearch.search.aggregations.metrics.avg.InternalAvg;
 import org.elasticsearch.search.aggregations.metrics.cardinality.InternalCardinality;
 import org.elasticsearch.search.aggregations.metrics.max.InternalMax;
@@ -87,5 +88,6 @@ protected void configure() {
         InternalGeoDistance.registerStream();
         InternalNested.registerStream();
         InternalReverseNested.registerStream();
+        InternalTopHits.registerStreams();
     }
 }
diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/tophits/InternalTopHits.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/tophits/InternalTopHits.java
@@ -0,0 +1,149 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.search.aggregations.bucket.tophits;
+
+import org.apache.lucene.search.*;
+import org.elasticsearch.ExceptionsHelper;
+import org.elasticsearch.common.io.stream.StreamInput;
+import org.elasticsearch.common.io.stream.StreamOutput;
+import org.elasticsearch.common.io.stream.Streamable;
+import org.elasticsearch.common.lucene.Lucene;
+import org.elasticsearch.common.xcontent.ToXContent;
+import org.elasticsearch.common.xcontent.XContentBuilder;
+import org.elasticsearch.search.SearchHits;
+import org.elasticsearch.search.aggregations.AggregationStreams;
+import org.elasticsearch.search.aggregations.InternalAggregation;
+import org.elasticsearch.search.internal.InternalSearchHit;
+import org.elasticsearch.search.internal.InternalSearchHits;
+
+import java.io.IOException;
+import java.util.List;
+
+/**
+ */
+public class InternalTopHits extends InternalAggregation implements TopHits, ToXContent, Streamable {
+
+    public static final InternalAggregation.Type TYPE = new Type("top_hits");
+
+    public static final AggregationStreams.Stream STREAM = new AggregationStreams.Stream() {
+        @Override
+        public InternalTopHits readResult(StreamInput in) throws IOException {
+            InternalTopHits buckets = new InternalTopHits();
+            buckets.readFrom(in);
+            return buckets;
+        }
+    };
+
+    public static void registerStreams() {
+        AggregationStreams.registerStream(STREAM, TYPE.stream());
+    }
+
+    private int size;
+    private Sort sort;
+    private TopDocs topDocs;
+    private InternalSearchHits searchHits;
+
+    InternalTopHits() {
+    }
+
+    public InternalTopHits(String name, int size, Sort sort, TopDocs topDocs, InternalSearchHits searchHits) {
+        this.name = name;
+        this.size = size;
+        this.sort = sort;
+        this.topDocs = topDocs;
+        this.searchHits = searchHits;
+    }
+
+    public InternalTopHits(String name, InternalSearchHits searchHits) {
+        this.name = name;
+        this.searchHits = searchHits;
+        this.topDocs = new TopDocs(0, Lucene.EMPTY_SCORE_DOCS, 0);
+    }
+
+
+    @Override
+    public Type type() {
+        return TYPE;
+    }
+
+    @Override
+    public SearchHits getHits() {
+        return searchHits;
+    }
+
+    @Override
+    public InternalAggregation reduce(ReduceContext reduceContext) {
+        List<InternalAggregation> aggregations = reduceContext.aggregations();
+        if (aggregations.size() == 1) {
+            return aggregations.get(0);
+        }
+
+        TopDocs[] shardDocs = new TopDocs[aggregations.size()];
+        InternalSearchHits[] shardHits = new InternalSearchHits[aggregations.size()];
+        for (int i = 0; i < shardDocs.length; i++) {
+            InternalTopHits topHitsAgg = (InternalTopHits) aggregations.get(i);
+            shardDocs[i] = topHitsAgg.topDocs;
+            shardHits[i] = topHitsAgg.searchHits;
+        }
+
+        try {
+            int[] tracker = new int[shardHits.length];
+            TopDocs reducedTopDocs = TopDocs.merge(sort, size, shardDocs);
+            InternalSearchHit[] hits = new InternalSearchHit[reducedTopDocs.scoreDocs.length];
+            for (int i = 0; i < reducedTopDocs.scoreDocs.length; i++) {
+                ScoreDoc scoreDoc = reducedTopDocs.scoreDocs[i];
+                hits[i] = (InternalSearchHit) shardHits[scoreDoc.shardIndex].getAt(tracker[scoreDoc.shardIndex]++);
+                if (scoreDoc instanceof FieldDoc) {
+                    FieldDoc fieldDoc = (FieldDoc) scoreDoc;
+                    hits[i].sortValues(fieldDoc.fields);
+                }
+            }
+            return new InternalTopHits(name, new InternalSearchHits(hits, reducedTopDocs.totalHits, reducedTopDocs.getMaxScore()));
+        } catch (IOException e) {
+            throw ExceptionsHelper.convertToElastic(e);
+        }
+    }
+
+    @Override
+    public void readFrom(StreamInput in) throws IOException {
+        name = in.readString();
+        size = in.readVInt();
+        topDocs = Lucene.readTopDocs(in);
+        if (topDocs instanceof TopFieldDocs) {
+            sort = new Sort(((TopFieldDocs) topDocs).fields);
+        }
+        searchHits = InternalSearchHits.readSearchHits(in);
+    }
+
+    @Override
+    public void writeTo(StreamOutput out) throws IOException {
+        out.writeString(name);
+        out.writeVInt(size);
+        Lucene.writeTopDocs(out, topDocs, 0);
+        searchHits.writeTo(out);
+    }
+
+    @Override
+    public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
+        builder.startObject(name);
+        searchHits.toXContent(builder, params);
+        builder.endObject();
+        return builder;
+    }
+}
diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/tophits/TopHits.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/tophits/TopHits.java
@@ -0,0 +1,30 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.search.aggregations.bucket.tophits;
+
+import org.elasticsearch.search.SearchHits;
+import org.elasticsearch.search.aggregations.Aggregation;
+
+/**
+ */
+public interface TopHits extends Aggregation {
+
+    SearchHits getHits();
+
+}