Skip to content

Commit

Permalink
field stats: added stats filtering option
Browse files Browse the repository at this point in the history
Field stats filter allows to filter out field stats results for indices that have no field values
in the defined range. This is useful to for example find all indices that have logs within a
certain date range. This option is only useful is the `level` option is set to `indices`.

The following request only returns field stats for indices that have `_timestamp` date values between
the defined range. The response format remains the same compared to if no field stats filtering is enabled.

curl -XPOST 'http://localhost:9200/_field_stats?level=indices' -d '{
   "fields" : { <1>
      "_timestamp" : { <2>
         "gte" : "2014-01-01T00:00:00.000Z",
         "lt" : "2015-01-01T00:00:00.000Z"
      }
   }
}'

Closes #11187
  • Loading branch information
martijnvg committed May 20, 2015
1 parent a40ba3b commit 78745f3
Show file tree
Hide file tree
Showing 13 changed files with 667 additions and 13 deletions.
49 changes: 48 additions & 1 deletion docs/reference/search/field-stats.asciidoc
Expand Up @@ -201,4 +201,51 @@ curl -XGET "http://localhost:9200/_field_stats?fields=rating,answer_count,creati
}
--------------------------------------------------

<1> The `stack` key means it contains all field stats for the `stack` index.
<1> The `stack` key means it contains all field stats for the `stack` index.

[float]
=== Field stats filtering

Field stats filter allows to filter out field stats results for indices that have no field values
in the defined range. This is useful to for example find all indices that have logs within a
certain date range. This option is only useful is the `level` option is set to `indices`.

The following request only returns field stats for indices that have `_timestamp` date values between
the defined range. The response format remains the same compared to if no field stats filtering is enabled.

[source,js]
--------------------------------------------------
curl -XPOST 'http://localhost:9200/_field_stats?level=indices' -d '{
"fields" : { <1>
"_timestamp" : { <2>
"gte" : "2014-01-01T00:00:00.000Z",
"lt" : "2015-01-01T00:00:00.000Z"
}
}
}'
--------------------------------------------------

<1> The top level `fields` object contains a field definitions for fields to retrieve stats for.
<2> The field definition for the `_timestamp` field with a range.

Each field definition object supports the following options to define a range:

[horizontal]
`gte`:: Greater-than or equal to
`gt`:: Greater-than
`lte`:: Less-than or equal to
`lt`:: Less-than

All the range options are optional. As can be seen in the following example,
an empty json object per field is valid too:

[source,js]
--------------------------------------------------
curl -XPOST 'http://localhost:9200/_field_stats?level=indices' -d '{
"fields" : {
"_timestamp" : {}
}
}'
--------------------------------------------------

This is equivalent to defining the `_timestamp` in the `fields` query string parameter.
5 changes: 4 additions & 1 deletion rest-api-spec/api/field_stats.json
Expand Up @@ -41,6 +41,9 @@
}
}
},
"body": null
"body": {
"description": "Field json objects containing the name and optionally a range to filter out indices result, that have results outside the defined bounds",
"required": false
}
}
}
24 changes: 24 additions & 0 deletions rest-api-spec/test/field_stats/10_basics.yaml
Expand Up @@ -50,3 +50,27 @@
- match: { indices.test_1.fields.number.doc_count: 1 }
- match: { indices.test_1.fields.number.min_value: 123 }
- match: { indices.test_1.fields.number.max_value: 123 }

---
"Field stats with filtering":
- do:
index:
index: test_1
type: test
id: id_1
body: { foo: "bar", number: 123 }

- do:
indices.refresh: {}

- do:
field_stats:
level: indices
index: test_1
body: { fields: { foo: {}, number: { gt: 1, lt: 2 } } }

- match: { indices.test_1.fields.foo.max_doc: 1 }
- match: { indices.test_1.fields.foo.doc_count: 1 }
- match: { indices.test_1.fields.foo.min_value: "bar" }
- match: { indices.test_1.fields.foo.max_value: "bar" }

Expand Up @@ -20,6 +20,8 @@
package org.elasticsearch.action.fieldstats;

import org.apache.lucene.util.BytesRef;
import org.elasticsearch.action.ValidateActions;
import org.elasticsearch.bootstrap.Elasticsearch;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Streamable;
Expand All @@ -31,7 +33,7 @@

import java.io.IOException;

public abstract class FieldStats<T> implements Streamable, ToXContent {
public abstract class FieldStats<T extends Comparable<T>> implements Streamable, ToXContent {

private byte type;
private long maxDoc;
Expand Down Expand Up @@ -120,6 +122,12 @@ public long getSumTotalTermFreq() {
*/
public abstract String getMaxValue();

/**
* @param value The string to be parsed
* @return The concrete object represented by the string argument
*/
protected abstract T valueOf(String value);

/**
* Merges the provided stats into this stats instance.
*/
Expand All @@ -142,6 +150,39 @@ public void append(FieldStats stats) {
}
}

/**
* Checks if this {@link FieldStats} instance matches with the provided range. It matches if the range overlaps
* in any way with the min and max value.
*
* @param lowerValue The lower value of this range
* @param includeLower Whether the lowerValue should be included into the range
* @param upperValue The upper value of this range
* @param includeUpper Whether the upperValue should be included into the range
* @return <code>true</code> if this instance matches with the provided range, otherwise <code>false</code> is returned
*/
public boolean match(String lowerValue, boolean includeLower, String upperValue, boolean includeUpper) {
T lower = null;
if (lowerValue != null) {
lower = valueOf(lowerValue);
int cmp = includeLower ? -1 : 0;
if (maxValue.compareTo(lower) <= cmp) {
return false;
}
}
if (upperValue != null) {
T upper = valueOf(upperValue);
if (lower != null && lower.compareTo(upper) > 0) {
throw new IllegalArgumentException("invalid range, lower [" + lower + "] is higher than upper [" + upperValue + "]");
}

int cmp = includeUpper ? 1 : 0;
if (minValue.compareTo(upper) >= cmp) {
return false;
}
}
return true;
}

@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject();
Expand Down Expand Up @@ -210,6 +251,11 @@ public void append(FieldStats stats) {
this.maxValue = Math.max(other.maxValue, maxValue);
}

@Override
protected java.lang.Long valueOf(String value) {
return java.lang.Long.valueOf(value);
}

@Override
public void readFrom(StreamInput in) throws IOException {
super.readFrom(in);
Expand Down Expand Up @@ -255,6 +301,11 @@ public void append(FieldStats stats) {
this.maxValue = Math.max(other.maxValue, maxValue);
}

@Override
protected java.lang.Float valueOf(String value) {
return java.lang.Float.valueOf(value);
}

@Override
public void readFrom(StreamInput in) throws IOException {
super.readFrom(in);
Expand Down Expand Up @@ -300,6 +351,11 @@ public void append(FieldStats stats) {
this.maxValue = Math.max(other.maxValue, maxValue);
}

@Override
protected java.lang.Double valueOf(String value) {
return java.lang.Double.valueOf(value);
}

@Override
public void readFrom(StreamInput in) throws IOException {
super.readFrom(in);
Expand Down Expand Up @@ -349,6 +405,11 @@ public void append(FieldStats stats) {
}
}

@Override
protected BytesRef valueOf(String value) {
return new BytesRef(value);
}

@Override
protected void toInnerXContent(XContentBuilder builder) throws IOException {
builder.field(Fields.MIN_VALUE, getMinValue());
Expand Down Expand Up @@ -393,6 +454,11 @@ public String getMaxValue() {
return dateFormatter.printer().print(maxValue);
}

@Override
protected java.lang.Long valueOf(String value) {
return dateFormatter.parser().parseMillis(value);
}

@Override
protected void toInnerXContent(XContentBuilder builder) throws IOException {
builder.field(Fields.MIN_VALUE, getMinValue());
Expand Down

0 comments on commit 78745f3

Please sign in to comment.