elastic · uboness · Mar 20, 2014 · Mar 17, 2014 · jpountz · Mar 18, 2014
diff --git a/docs/reference/search/aggregations/bucket/datehistogram-aggregation.asciidoc b/docs/reference/search/aggregations/bucket/datehistogram-aggregation.asciidoc
@@ -131,6 +131,8 @@ Response:
 --------------------------------------------------
 
 Like with the normal <<search-aggregations-bucket-histogram-aggregation,histogram>>, both document level scripts and
-value level scripts are supported. It is also possilbe to control the order of the returned buckets using the `order`
+value level scripts are supported. It is also possible to control the order of the returned buckets using the `order`
 settings and filter the returned buckets based on a `min_doc_count` setting (by defaults to all buckets with
-`min_doc_count > 1` will be returned).
+`min_doc_count > 0` will be returned). This histogram also supports the `extended_bounds` settings, that enables extending
+the bounds of the histogram beyond the data itself (to read more on why you'd want to do that please refer to the
+explanation <<search-aggregations-bucket-histogram-aggregation-extended-bounds,here>>.
diff --git a/docs/reference/search/aggregations/bucket/histogram-aggregation.asciidoc b/docs/reference/search/aggregations/bucket/histogram-aggregation.asciidoc
@@ -112,6 +112,54 @@ Response:
 
 <1> No documents were found that belong in this bucket, yet it is still returned with zero `doc_count`.
 
+[[search-aggregations-bucket-histogram-aggregation-extended-bounds]]
+By default the date_/histogram returns all the buckets within the range of the data itself, that is, the documents with
+the smallest values (on which with histogram) will determine the min bucket (the bucket with the smallest key) and the
+documents with the highest values will determine the max bucket (the bucket with the highest key). Often, when when
+requesting empty buckets (`"min_doc_count" : 0`), this causes a confusion, specifically, when the data is also filtered.
+
+To understand why, let's look at an example:
+
+Lets say the you're filtering your request to get all docs with values between `0` and `500`, in addition you'd like
+to slice the data per price using a histogram with an interval of `50`. You also specify `"min_doc_count" : 0` as you'd
+like to get all buckets even the empty ones. If it happens that all products (documents) have prices higher than `100`,
+the first bucket you'll get will be the one with `100` as its key. This is confusing, as many times, you'd also like
+to get those buckets between `0 - 100`.
+
+With `extended_bounds` setting, you now can "force" the histogram aggregation to start building buckets on a specific
+`min` values and also keep on building buckets up to a `max` value (even if there are no documents anymore). Using
+`extended_bounds` only makes sense when `min_doc_count` is 0 (the empty buckets will never be returned if `min_doc_count`
+is greater than 0).
+
+Note that (as the name suggest) `extended_bounds` is **not** filtering buckets. Meaning, if the `extended_bounds.min` is higher
+than the values extracted from the documents, the documents will still dictate what the first bucket will be (and the
+same goes for the `extended_bounds.max` and the last bucket). For filtering buckets, one should nest the histogram aggregation
+under a range `filter` aggregation with the appropriate `from`/`to` settings.
+
+Example:
+
+[source,js]
+--------------------------------------------------
+{
+    "query" : {
+        "filtered" : { "range" : { "price" : { "to" : "500" } } }
+    },
+    "aggs" : {
+        "prices" : {
+            "histogram" : {
+                "field" : "price",
+                "interval" : 50,
+                "min_doc_count" : 0,
+                "extended_bounds" : {
+                    "min" : 0,
+                    "max" : 500
+                }
+            }
+        }
+    }
+}
+--------------------------------------------------
+
 ==== Order
 
 By default the returned buckets are sorted by their `key` ascending, though the order behaviour can be controled

diff --git a/...ain/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramBuilder.java b/...ain/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramBuilder.java
@@ -23,6 +23,7 @@
 import org.elasticsearch.common.xcontent.XContentBuilder;
 import org.elasticsearch.search.aggregations.ValuesSourceAggregationBuilder;
 import org.elasticsearch.search.builder.SearchSourceBuilderException;
+import org.joda.time.DateTime;
 
 import java.io.IOException;
 
@@ -34,6 +35,8 @@ public class DateHistogramBuilder extends ValuesSourceAggregationBuilder<DateHis
     private Object interval;
     private Histogram.Order order;
     private Long minDocCount;
+    private Object extendedBoundsMin;
+    private Object extendedBoundsMax;
     private String preZone;
     private String postZone;
     private boolean preZoneAdjustLargeInterval;
@@ -101,6 +104,24 @@ public DateHistogramBuilder format(String format) {
         return this;
     }
 
+    public DateHistogramBuilder extendedBounds(Long min, Long max) {
+        extendedBoundsMin = min;
+        extendedBoundsMax = max;
+        return this;
+    }
+
+    public DateHistogramBuilder extendedBounds(String min, String max) {
+        extendedBoundsMin = min;
+        extendedBoundsMax = max;
+        return this;
+    }
+
+    public DateHistogramBuilder extendedBounds(DateTime min, DateTime max) {
+        extendedBoundsMin = min;
+        extendedBoundsMax = max;
+        return this;
+    }
+
     @Override
     protected XContentBuilder doInternalXContent(XContentBuilder builder, Params params) throws IOException {
         if (interval == null) {
@@ -148,6 +169,17 @@ protected XContentBuilder doInternalXContent(XContentBuilder builder, Params par
             builder.field("format", format);
         }
 
+        if (extendedBoundsMin != null || extendedBoundsMax != null) {
+            builder.startObject(DateHistogramParser.EXTENDED_BOUNDS.getPreferredName());
+            if (extendedBoundsMin != null) {
+                builder.field("min", extendedBoundsMin);
+            }
+            if (extendedBoundsMax != null) {
+                builder.field("max", extendedBoundsMax);
+            }
+            builder.endObject();
+        }
+
         return builder;
     }
 

diff --git a/...main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramParser.java b/...main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramParser.java
@@ -19,6 +19,7 @@
 package org.elasticsearch.search.aggregations.bucket.histogram;
 
 import com.google.common.collect.ImmutableMap;
+import org.elasticsearch.common.ParseField;
 import org.elasticsearch.common.collect.MapBuilder;
 import org.elasticsearch.common.joda.DateMathParser;
 import org.elasticsearch.common.rounding.DateTimeUnit;
@@ -48,6 +49,8 @@
  */
 public class DateHistogramParser implements Aggregator.Parser {
 
+    static final ParseField EXTENDED_BOUNDS = new ParseField("extended_bounds");
+
     private final ImmutableMap<String, DateTimeUnit> dateFieldUnits;
 
     public DateHistogramParser() {
@@ -87,6 +90,7 @@ public AggregatorFactory parse(String aggregationName, XContentParser parser, Se
         Map<String, Object> scriptParams = null;
         boolean keyed = false;
         long minDocCount = 1;
+        ExtendedBounds extendedBounds = null;
         InternalOrder order = (InternalOrder) Histogram.Order.KEY_ASC;
         String interval = null;
         boolean preZoneAdjustLargeInterval = false;
@@ -162,6 +166,32 @@ public AggregatorFactory parse(String aggregationName, XContentParser parser, Se
                             //TODO should we throw an error if the value is not "asc" or "desc"???
                         }
                     }
+                } else if (EXTENDED_BOUNDS.match(currentFieldName)) {
+                    extendedBounds = new ExtendedBounds();
+                    while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
+                        if (token == XContentParser.Token.FIELD_NAME) {
+                            currentFieldName = parser.currentName();
+                        } else if (token == XContentParser.Token.VALUE_STRING) {
+                            if ("min".equals(currentFieldName)) {
+                                extendedBounds.minAsStr = parser.text();
+                            } else if ("max".equals(currentFieldName)) {
+                                extendedBounds.maxAsStr = parser.text();
+                            } else {
+                                throw new SearchParseException(context, "Unknown extended_bounds key for a " + token + " in aggregation [" + aggregationName + "]: [" + currentFieldName + "].");
+                            }
+                        } else if (token == XContentParser.Token.VALUE_NUMBER) {
+                            if ("min".equals(currentFieldName)) {
+                                extendedBounds.min = parser.longValue();
+                            } else if ("max".equals(currentFieldName)) {
+                                extendedBounds.max = parser.longValue();
+                            } else {
+                                throw new SearchParseException(context, "Unknown extended_bounds key for a " + token + " in aggregation [" + aggregationName + "]: [" + currentFieldName + "].");
+                            }
+                        } else {
+                            throw new SearchParseException(context, "Unknown key for a " + token + " in [" + aggregationName + "]: [" + currentFieldName + "].");
+                        }
+                    }
+
                 } else {
                     throw new SearchParseException(context, "Unknown key for a " + token + " in [" + aggregationName + "]: [" + currentFieldName + "].");
                 }
@@ -209,17 +239,21 @@ public AggregatorFactory parse(String aggregationName, XContentParser parser, Se
             if (searchScript != null) {
                 ValueParser valueParser = new ValueParser.DateMath(new DateMathParser(DateFieldMapper.Defaults.DATE_TIME_FORMATTER, DateFieldMapper.Defaults.TIME_UNIT));
                 config.parser(valueParser);
-                return new HistogramAggregator.Factory(aggregationName, config, rounding, order, keyed, minDocCount, InternalDateHistogram.FACTORY);
+                return new HistogramAggregator.Factory(aggregationName, config, rounding, order, keyed, minDocCount, extendedBounds, InternalDateHistogram.FACTORY);
             }
 
             // falling back on the get field data context
-            return new HistogramAggregator.Factory(aggregationName, config, rounding, order, keyed, minDocCount, InternalDateHistogram.FACTORY);
+            return new HistogramAggregator.Factory(aggregationName, config, rounding, order, keyed, minDocCount, extendedBounds, InternalDateHistogram.FACTORY);
         }
 
         FieldMapper<?> mapper = context.smartNameFieldMapper(field);
         if (mapper == null) {
             config.unmapped(true);
-            return new HistogramAggregator.Factory(aggregationName, config, rounding, order, keyed, minDocCount, InternalDateHistogram.FACTORY);
+            if (format == null) {
+                config.formatter(new ValueFormatter.DateTime(DateFieldMapper.Defaults.DATE_TIME_FORMATTER));
+            }
+            config.parser(new ValueParser.DateMath(new DateMathParser(DateFieldMapper.Defaults.DATE_TIME_FORMATTER, DateFieldMapper.Defaults.TIME_UNIT)));
+            return new HistogramAggregator.Factory(aggregationName, config, rounding, order, keyed, minDocCount, extendedBounds, InternalDateHistogram.FACTORY);
         }
 
         if (!(mapper instanceof DateFieldMapper)) {
@@ -228,7 +262,11 @@ public AggregatorFactory parse(String aggregationName, XContentParser parser, Se
 
         IndexFieldData<?> indexFieldData = context.fieldData().getForField(mapper);
         config.fieldContext(new FieldContext(field, indexFieldData));
-        return new HistogramAggregator.Factory(aggregationName, config, rounding, order, keyed, minDocCount, InternalDateHistogram.FACTORY);
+        if (format == null) {
+            config.formatter(new ValueFormatter.DateTime(((DateFieldMapper) mapper).dateTimeFormatter()));
+        }
+        config.parser(new ValueParser.DateMath(new DateMathParser(((DateFieldMapper) mapper).dateTimeFormatter(), DateFieldMapper.Defaults.TIME_UNIT)));
+        return new HistogramAggregator.Factory(aggregationName, config, rounding, order, keyed, minDocCount, extendedBounds, InternalDateHistogram.FACTORY);
     }
 
     private static InternalOrder resolveOrder(String key, boolean asc) {

diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/ExtendedBounds.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/ExtendedBounds.java
@@ -0,0 +1,91 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.search.aggregations.bucket.histogram;
+
+import org.elasticsearch.common.io.stream.StreamInput;
+import org.elasticsearch.common.io.stream.StreamOutput;
+import org.elasticsearch.common.rounding.Rounding;
+import org.elasticsearch.search.SearchParseException;
+import org.elasticsearch.search.aggregations.support.numeric.ValueParser;
+import org.elasticsearch.search.internal.SearchContext;
+
+import java.io.IOException;
+
+/**
+ *
+ */
+public class ExtendedBounds {
+
+    Long min;
+    Long max;
+
+    String minAsStr;
+    String maxAsStr;
+
+    ExtendedBounds() {} //for serialization
+
+    ExtendedBounds(Long min, Long max) {
+        this.min = min;
+        this.max = max;
+    }
+
+    void processAndValidate(String aggName, SearchContext context, ValueParser parser) {
+        if (minAsStr != null) {
+            min = parser != null ? parser.parseLong(minAsStr, context) : Long.parseLong(minAsStr);
+        }
+        if (maxAsStr != null) {
+            max = parser != null ? parser.parseLong(maxAsStr, context) : Long.parseLong(maxAsStr);
+        }
+        if (min != null && max != null && min.compareTo(max) > 0) {
+            throw new SearchParseException(context, "[extended_bounds.min][" + min + "] cannot be greater than " +
+                    "[extended_bounds.max][" + max + "] for histogram aggregation [" + aggName + "]");
+        }
+    }
+
+    ExtendedBounds round(Rounding rounding) {
+        return new ExtendedBounds(min != null ? rounding.round(min) : null, max != null ? rounding.round(max) : null);
+    }
+
+    void writeTo(StreamOutput out) throws IOException {
+        if (min != null) {
+            out.writeBoolean(true);
+            out.writeLong(min);
+        } else {
+            out.writeBoolean(false);
+        }
+        if (max != null) {
+            out.writeBoolean(true);
+            out.writeLong(max);
+        } else {
+            out.writeBoolean(false);
+        }
+    }
+
+    static ExtendedBounds readFrom(StreamInput in) throws IOException {
+        ExtendedBounds bounds = new ExtendedBounds();
+        if (in.readBoolean()) {
+            bounds.min = in.readLong();
+        }
+        if (in.readBoolean()) {
+            bounds.max = in.readLong();
+        }
+        return bounds;
+    }
+}