Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add doc values support to boolean fields. #7961

Merged
merged 1 commit into from Apr 2, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 2 additions & 1 deletion dev-tools/create-bwc-index.py
Expand Up @@ -61,7 +61,8 @@ def index_documents(es, index_name, type, num_docs):
for id in range(0, num_docs):
es.index(index=index_name, doc_type=type, id=id, body={'string': str(random.randint(0, 100)),
'long_sort': random.randint(0, 100),
'double_sort' : float(random.randint(0, 100))})
'double_sort' : float(random.randint(0, 100)),
'bool' : random.choice([True, False])})
if rarely():
es.indices.refresh(index=index_name)
if rarely():
Expand Down
32 changes: 32 additions & 0 deletions docs/reference/migration/migrate_2_0.asciidoc
Expand Up @@ -273,6 +273,38 @@ to provide special features. They now have limited configuration options.
* `_field_names` configuration is limited to disabling the field.
* `_size` configuration is limited to enabling the field.

=== Boolean fields

Boolean fields used to have a string fielddata with `F` meaning `false` and `T`
meaning `true`. They have been refactored to use numeric fielddata, with `0`
for `false` and `1` for `true`. As a consequence, the format of the responses of
the following APIs changed when applied to boolean fields: `0`/`1` is returned
instead of `F`/`T`:

- <<search-request-fielddata-fields,fielddata fields>>
- <<search-request-sort,sort values>>
- <<search-aggregations-bucket-terms-aggregation,terms aggregations>>

In addition, terms aggregations use a custom formatter for boolean (like for
dates and ip addresses, which are also backed by numbers) in order to return
the user-friendly representation of boolean fields: `false`/`true`:

[source,json]
---------------
"buckets": [
{
"key": 0,
"key_as_string": "false",
"doc_count": 42
},
{
"key": 1,
"key_as_string": "true",
"doc_count": 12
}
]
---------------

=== Codecs

It is no longer possible to specify per-field postings and doc values formats
Expand Down
Expand Up @@ -646,6 +646,25 @@ public XContentBuilder field(XContentBuilderString name, Iterable value) throws
return this;
}

public XContentBuilder field(String name, boolean... value) throws IOException {
startArray(name);
for (boolean o : value) {
value(o);
}
endArray();
return this;
}


public XContentBuilder field(XContentBuilderString name, boolean... value) throws IOException {
startArray(name);
for (boolean o : value) {
value(o);
}
endArray();
return this;
}

public XContentBuilder field(String name, String... value) throws IOException {
startArray(name);
for (String o : value) {
Expand Down
Expand Up @@ -33,6 +33,7 @@
import org.elasticsearch.index.Index;
import org.elasticsearch.index.fielddata.plain.*;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.index.mapper.core.BooleanFieldMapper;
import org.elasticsearch.index.mapper.internal.IndexFieldMapper;
import org.elasticsearch.index.mapper.internal.ParentFieldMapper;
import org.elasticsearch.index.IndexService;
Expand Down Expand Up @@ -78,6 +79,7 @@ public class IndexFieldDataService extends AbstractIndexComponent {
.put(ParentFieldMapper.NAME, new ParentChildIndexFieldData.Builder())
.put(IndexFieldMapper.NAME, new IndexIndexFieldData.Builder())
.put("binary", new DisabledIndexFieldData.Builder())
.put(BooleanFieldMapper.CONTENT_TYPE, new PackedArrayIndexFieldData.Builder().setNumericType(IndexNumericFieldData.NumericType.BOOLEAN))
.immutableMap();

docValuesBuildersByType = MapBuilder.<String, IndexFieldData.Builder>newMapBuilder()
Expand All @@ -90,6 +92,7 @@ public class IndexFieldDataService extends AbstractIndexComponent {
.put("long", new DocValuesIndexFieldData.Builder().numericType(IndexNumericFieldData.NumericType.LONG))
.put("geo_point", new GeoPointBinaryDVIndexFieldData.Builder())
.put("binary", new BytesBinaryDVIndexFieldData.Builder())
.put(BooleanFieldMapper.CONTENT_TYPE, new DocValuesIndexFieldData.Builder().numericType(IndexNumericFieldData.NumericType.BOOLEAN))
.immutableMap();

buildersByTypeAndFormat = MapBuilder.<Tuple<String, String>, IndexFieldData.Builder>newMapBuilder()
Expand Down Expand Up @@ -130,6 +133,10 @@ public class IndexFieldDataService extends AbstractIndexComponent {
.put(Tuple.tuple("binary", DOC_VALUES_FORMAT), new BytesBinaryDVIndexFieldData.Builder())
.put(Tuple.tuple("binary", DISABLED_FORMAT), new DisabledIndexFieldData.Builder())

.put(Tuple.tuple(BooleanFieldMapper.CONTENT_TYPE, ARRAY_FORMAT), new PackedArrayIndexFieldData.Builder().setNumericType(IndexNumericFieldData.NumericType.BOOLEAN))
.put(Tuple.tuple(BooleanFieldMapper.CONTENT_TYPE, DOC_VALUES_FORMAT), new DocValuesIndexFieldData.Builder().numericType(IndexNumericFieldData.NumericType.BOOLEAN))
.put(Tuple.tuple(BooleanFieldMapper.CONTENT_TYPE, DISABLED_FORMAT), new DisabledIndexFieldData.Builder())

.immutableMap();
}

Expand Down
Expand Up @@ -24,13 +24,38 @@
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.NumericUtils;
import org.elasticsearch.ElasticsearchIllegalArgumentException;
import org.elasticsearch.index.fielddata.ordinals.OrdinalsBuilder;
import org.elasticsearch.index.mapper.core.BooleanFieldMapper;

/**
*/
public interface IndexNumericFieldData extends IndexFieldData<AtomicNumericFieldData> {

public static enum NumericType {
BOOLEAN(1, false, SortField.Type.INT, 0, 1) {
@Override
public long toLong(BytesRef indexForm) {
if (indexForm.equals(BooleanFieldMapper.Values.FALSE)) {
return 0;
} else if (indexForm.equals(BooleanFieldMapper.Values.TRUE)) {
return 1;
} else {
throw new ElasticsearchIllegalArgumentException("Cannot convert " + indexForm + " to a boolean");
}
}

@Override
public void toIndexForm(Number number, BytesRefBuilder bytes) {
bytes.append(number.intValue() != 0 ? BooleanFieldMapper.Values.TRUE : BooleanFieldMapper.Values.FALSE);
}

@Override
public Number toNumber(BytesRef indexForm) {
return toLong(indexForm);
}

},
BYTE(8, false, SortField.Type.INT, Byte.MIN_VALUE, Byte.MAX_VALUE) {
@Override
public long toLong(BytesRef indexForm) {
Expand Down Expand Up @@ -174,7 +199,9 @@ public double toDouble(BytesRef indexForm) {
public abstract Number toNumber(BytesRef indexForm);

public final TermsEnum wrapTermsEnum(TermsEnum termsEnum) {
if (requiredBits() > 32) {
if (requiredBits() == 1) { // boolean, no prefix-terms
return termsEnum;
} else if (requiredBits() > 32) {
return OrdinalsBuilder.wrapNumeric64Bit(termsEnum);
} else {
return OrdinalsBuilder.wrapNumeric32Bit(termsEnum);
Expand Down
Expand Up @@ -99,7 +99,7 @@ public PackedArrayIndexFieldData(Index index, @IndexSettings Settings indexSetti
CircuitBreakerService breakerService) {
super(index, indexSettings, fieldNames, fieldDataType, cache);
Preconditions.checkNotNull(numericType);
Preconditions.checkArgument(EnumSet.of(NumericType.BYTE, NumericType.SHORT, NumericType.INT, NumericType.LONG).contains(numericType), getClass().getSimpleName() + " only supports integer types, not " + numericType);
Preconditions.checkArgument(EnumSet.of(NumericType.BOOLEAN, NumericType.BYTE, NumericType.SHORT, NumericType.INT, NumericType.LONG).contains(numericType), getClass().getSimpleName() + " only supports integer types, not " + numericType);
this.numericType = numericType;
this.breakerService = breakerService;
}
Expand Down Expand Up @@ -127,16 +127,13 @@ public AtomicNumericFieldData loadDirect(LeafReaderContext context) throws Excep

final float acceptableTransientOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_transient_overhead_ratio", OrdinalsBuilder.DEFAULT_ACCEPTABLE_OVERHEAD_RATIO);
TermsEnum termsEnum = estimator.beforeLoad(terms);
assert !getNumericType().isFloatingPoint();
boolean success = false;
try (OrdinalsBuilder builder = new OrdinalsBuilder(-1, reader.maxDoc(), acceptableTransientOverheadRatio)) {
BytesRefIterator iter = builder.buildFromTerms(termsEnum);
BytesRef term;
assert !getNumericType().isFloatingPoint();
final boolean indexedAsLong = getNumericType().requiredBits() > 32;
while ((term = iter.next()) != null) {
final long value = indexedAsLong
? NumericUtils.prefixCodedToLong(term)
: NumericUtils.prefixCodedToInt(term);
final long value = numericType.toLong(term);
valuesBuilder.add(value);
}
final PackedLongValues values = valuesBuilder.build();
Expand Down
Expand Up @@ -21,6 +21,7 @@

import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.SortedNumericDocValuesField;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.queries.TermFilter;
import org.apache.lucene.search.Filter;
Expand Down Expand Up @@ -51,9 +52,8 @@
import static org.elasticsearch.index.mapper.core.TypeParsers.parseField;

/**
*
* A field mapper for boolean fields.
*/
// TODO this can be made better, maybe storing a byte for it?
public class BooleanFieldMapper extends AbstractFieldMapper<Boolean> {

public static final String CONTENT_TYPE = "boolean";
Expand Down Expand Up @@ -100,7 +100,7 @@ public Builder tokenized(boolean tokenized) {

@Override
public BooleanFieldMapper build(BuilderContext context) {
return new BooleanFieldMapper(buildNames(context), boost, fieldType, nullValue,
return new BooleanFieldMapper(buildNames(context), boost, fieldType, docValues, nullValue,
similarity, normsLoading, fieldDataSettings, context.indexSettings(), multiFieldsBuilder.build(this, context), copyTo);
}
}
Expand Down Expand Up @@ -128,10 +128,10 @@ public Mapper.Builder parse(String name, Map<String, Object> node, ParserContext

private Boolean nullValue;

protected BooleanFieldMapper(Names names, float boost, FieldType fieldType, Boolean nullValue,
protected BooleanFieldMapper(Names names, float boost, FieldType fieldType, Boolean docValues, Boolean nullValue,
SimilarityProvider similarity, Loading normsLoading,
@Nullable Settings fieldDataSettings, Settings indexSettings, MultiFields multiFields, CopyTo copyTo) {
super(names, boost, fieldType, false, Lucene.KEYWORD_ANALYZER, Lucene.KEYWORD_ANALYZER, similarity, normsLoading, fieldDataSettings, indexSettings, multiFields, copyTo);
super(names, boost, fieldType, docValues, Lucene.KEYWORD_ANALYZER, Lucene.KEYWORD_ANALYZER, similarity, normsLoading, fieldDataSettings, indexSettings, multiFields, copyTo);
this.nullValue = nullValue;
}

Expand All @@ -143,7 +143,7 @@ public FieldType defaultFieldType() {
@Override
public FieldDataType defaultFieldDataType() {
// TODO have a special boolean type?
return new FieldDataType("string");
return new FieldDataType(CONTENT_TYPE);
}

@Override
Expand Down Expand Up @@ -210,7 +210,7 @@ public Filter nullValueFilter() {

@Override
protected void parseCreateField(ParseContext context, List<Field> fields) throws IOException {
if (fieldType().indexOptions() == IndexOptions.NONE && !fieldType().stored()) {
if (fieldType().indexOptions() == IndexOptions.NONE && !fieldType().stored() && !hasDocValues()) {
return;
}

Expand All @@ -230,6 +230,9 @@ protected void parseCreateField(ParseContext context, List<Field> fields) throws
return;
}
fields.add(new Field(names.indexName(), value ? "T" : "F", fieldType));
if (hasDocValues()) {
fields.add(new SortedNumericDocValuesField(names.indexName(), value ? 1 : 0));
}
}

@Override
Expand Down
Expand Up @@ -25,6 +25,7 @@
import org.elasticsearch.index.fielddata.IndexGeoPointFieldData;
import org.elasticsearch.index.fielddata.IndexNumericFieldData;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.index.mapper.core.BooleanFieldMapper;
import org.elasticsearch.index.mapper.core.DateFieldMapper;
import org.elasticsearch.index.mapper.core.NumberFieldMapper;
import org.elasticsearch.index.mapper.ip.IpFieldMapper;
Expand Down Expand Up @@ -209,6 +210,9 @@ private static ValueFormat resolveFormat(@Nullable String format, FieldMapper ma
if (mapper instanceof IpFieldMapper) {
return ValueFormat.IPv4;
}
if (mapper instanceof BooleanFieldMapper) {
return ValueFormat.BOOLEAN;
}
if (mapper instanceof NumberFieldMapper) {
return format != null ? ValueFormat.Number.format(format) : ValueFormat.RAW;
}
Expand Down
Expand Up @@ -28,6 +28,7 @@ public class ValueFormat {

public static final ValueFormat RAW = new ValueFormat(ValueFormatter.RAW, ValueParser.RAW);
public static final ValueFormat IPv4 = new ValueFormat(ValueFormatter.IPv4, ValueParser.IPv4);
public static final ValueFormat BOOLEAN = new ValueFormat(ValueFormatter.BOOLEAN, ValueParser.BOOLEAN);

private final ValueFormatter formatter;
private final ValueParser parser;
Expand Down
Expand Up @@ -19,8 +19,6 @@
package org.elasticsearch.search.aggregations.support.format;

import org.elasticsearch.common.geo.GeoHashUtils;
import org.elasticsearch.common.geo.GeoPoint;
import org.elasticsearch.common.geo.GeoUtils;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Streamable;
Expand All @@ -45,6 +43,7 @@ public interface ValueFormatter extends Streamable {
public final static ValueFormatter RAW = new Raw();
public final static ValueFormatter IPv4 = new IPv4Formatter();
public final static ValueFormatter GEOHASH = new GeoHash();
public final static ValueFormatter BOOLEAN = new BooleanFormatter();

/**
* Uniquely identifies this formatter (used for efficient serialization)
Expand Down Expand Up @@ -266,4 +265,31 @@ public void writeTo(StreamOutput out) throws IOException {
}
}

static class BooleanFormatter implements ValueFormatter {

static final byte ID = 10;

@Override
public byte id() {
return ID;
}

@Override
public String format(long value) {
return Boolean.valueOf(value != 0).toString();
}

@Override
public String format(double value) {
return Boolean.valueOf(value != 0).toString();
}

@Override
public void readFrom(StreamInput in) throws IOException {
}

@Override
public void writeTo(StreamOutput out) throws IOException {
}
}
}
Expand Up @@ -38,6 +38,7 @@ public static ValueFormatter read(StreamInput in) throws IOException {
case ValueFormatter.DateTime.ID: formatter = new ValueFormatter.DateTime(); break;
case ValueFormatter.Number.Pattern.ID: formatter = new ValueFormatter.Number.Pattern(); break;
case ValueFormatter.GeoHash.ID: formatter = ValueFormatter.GEOHASH; break;
case ValueFormatter.BooleanFormatter.ID: formatter = ValueFormatter.BOOLEAN; break;
default: throw new ElasticsearchIllegalArgumentException("Unknown value formatter with id [" + id + "]");
}
formatter.readFrom(in);
Expand Down
Expand Up @@ -41,6 +41,7 @@ public interface ValueParser {

static final ValueParser IPv4 = new IPv4();
static final ValueParser RAW = new Raw();
static final ValueParser BOOLEAN = new Boolean();

long parseLong(String value, SearchContext searchContext);

Expand Down Expand Up @@ -184,4 +185,20 @@ public double parseDouble(String value, SearchContext searchContext) {
}
}

static class Boolean implements ValueParser {

private Boolean() {
}

@Override
public long parseLong(String value, SearchContext searchContext) {
return java.lang.Boolean.parseBoolean(value) ? 1 : 0;
}

@Override
public double parseDouble(String value, SearchContext searchContext) {
return parseLong(value, searchContext);
}
}

}