Skip to content

Commit

Permalink
Index field names of documents.
Browse files Browse the repository at this point in the history
The `exists` and `missing` filters need to merge postings lists of all existing
terms, which can be very costly, especially on high-cardinality fields. This
commit indexes the field names of a document under `_field_names` and reuses it
to speed up the `exists` and `missing` filters.

This is only enabled for indices that are created on or after Elasticsearch
1.3.0.

Close #5659
  • Loading branch information
jpountz committed Jun 19, 2014
1 parent e2da211 commit 703dbff
Show file tree
Hide file tree
Showing 14 changed files with 507 additions and 8 deletions.
2 changes: 2 additions & 0 deletions docs/reference/mapping/fields.asciidoc
Expand Up @@ -21,6 +21,8 @@ include::fields/boost-field.asciidoc[]

include::fields/parent-field.asciidoc[]

include::fields/field-names-field.asciidoc[]

include::fields/routing-field.asciidoc[]

include::fields/index-field.asciidoc[]
Expand Down
11 changes: 11 additions & 0 deletions docs/reference/mapping/fields/field-names-field.asciidoc
@@ -0,0 +1,11 @@
[[mapping-field-names-field]]
=== `_field_names`

coming[1.3.0]

The `_field_names` field indexes the field names of a document, which can later
be used to search for documents based on the fields that they contain typically
using the `exists` and `missing` filters.

`_field_names` is indexed by default for indices that have been created after
Elasticsearch 1.3.0.
11 changes: 11 additions & 0 deletions src/main/java/org/elasticsearch/Version.java
Expand Up @@ -19,12 +19,14 @@

package org.elasticsearch;

import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.inject.AbstractModule;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.monitor.jvm.JvmInfo;

import java.io.IOException;
Expand Down Expand Up @@ -344,6 +346,15 @@ public static Version fromId(int id) {
}
}

/**
* Return the {@link Version} of Elasticsearch that has been used to create an index given its settings.
*/
public static Version indexCreated(Settings indexSettings) {
assert indexSettings.get(IndexMetaData.SETTING_UUID) == null // if the UUDI is there the index has actually been created otherwise this might be a test
|| indexSettings.getAsVersion(IndexMetaData.SETTING_VERSION_CREATED, null) != null : IndexMetaData.SETTING_VERSION_CREATED + " not set in IndexSettings";
return indexSettings.getAsVersion(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT);
}

public static void writeVersion(Version version, StreamOutput out) throws IOException {
out.writeVInt(version.id);
}
Expand Down
Expand Up @@ -180,6 +180,8 @@ public Builder(String index, @Nullable Settings indexSettings, RootObjectMapper.
this.rootMappers.put(TTLFieldMapper.class, new TTLFieldMapper());
this.rootMappers.put(VersionFieldMapper.class, new VersionFieldMapper());
this.rootMappers.put(ParentFieldMapper.class, new ParentFieldMapper());
// _field_names last so that it can see all other fields
this.rootMappers.put(FieldNamesFieldMapper.class, new FieldNamesFieldMapper(indexSettings));
}

public Builder meta(ImmutableMap<String, Object> meta) {
Expand Down
Expand Up @@ -21,9 +21,7 @@

import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Maps;
import org.elasticsearch.ElasticsearchParseException;
import org.elasticsearch.Version;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.collect.MapBuilder;
Expand Down Expand Up @@ -51,7 +49,6 @@

import java.util.Iterator;
import java.util.Map;
import java.util.Set;

import static org.elasticsearch.index.mapper.MapperBuilders.doc;

Expand Down Expand Up @@ -122,10 +119,9 @@ public DocumentMapperParser(Index index, @IndexSettings Settings indexSettings,
.put(UidFieldMapper.NAME, new UidFieldMapper.TypeParser())
.put(VersionFieldMapper.NAME, new VersionFieldMapper.TypeParser())
.put(IdFieldMapper.NAME, new IdFieldMapper.TypeParser())
.put(FieldNamesFieldMapper.NAME, new FieldNamesFieldMapper.TypeParser())
.immutableMap();
assert indexSettings.get(IndexMetaData.SETTING_UUID) == null // if the UUDI is there the index has actually been created otherwise this might be a test
|| indexSettings.getAsVersion(IndexMetaData.SETTING_VERSION_CREATED, null) != null : IndexMetaData.SETTING_VERSION_CREATED + " not set in IndexSettings";
indexVersionCreated = indexSettings.getAsVersion(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT);
indexVersionCreated = Version.indexCreated(indexSettings);
}

public void putTypeParser(String type, Mapper.TypeParser typeParser) {
Expand Down
Expand Up @@ -74,6 +74,10 @@ public static TypeFieldMapper.Builder type() {
return new TypeFieldMapper.Builder();
}

public static FieldNamesFieldMapper.Builder fieldNames() {
return new FieldNamesFieldMapper.Builder();
}

public static IndexFieldMapper.Builder index() {
return new IndexFieldMapper.Builder();
}
Expand Down
@@ -0,0 +1,248 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.elasticsearch.index.mapper.internal;

import com.google.common.collect.UnmodifiableIterator;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.document.XStringField;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.Version;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.index.codec.docvaluesformat.DocValuesFormatProvider;
import org.elasticsearch.index.codec.postingsformat.PostingsFormatProvider;
import org.elasticsearch.index.fielddata.FieldDataType;
import org.elasticsearch.index.mapper.*;
import org.elasticsearch.index.mapper.core.AbstractFieldMapper;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import static org.elasticsearch.index.mapper.MapperBuilders.fieldNames;
import static org.elasticsearch.index.mapper.core.TypeParsers.parseField;

/**
* A mapper that indexes the field names of a document under <code>_field_names</code>. This mapper is typically useful in order
* to have fast <code>exists</code> and <code>missing</code> queries/filters.
*
* Added in Elasticsearch 1.3.
*/
public class FieldNamesFieldMapper extends AbstractFieldMapper<String> implements InternalMapper, RootMapper {

public static final String NAME = "_field_names";

public static final String CONTENT_TYPE = "_field_names";

public static class Defaults extends AbstractFieldMapper.Defaults {
public static final String NAME = FieldNamesFieldMapper.NAME;
public static final String INDEX_NAME = FieldNamesFieldMapper.NAME;

public static final FieldType FIELD_TYPE = new FieldType(AbstractFieldMapper.Defaults.FIELD_TYPE);
public static final FieldType FIELD_TYPE_PRE_1_3_0;

static {
FIELD_TYPE.setIndexed(true);
FIELD_TYPE.setTokenized(false);
FIELD_TYPE.setStored(false);
FIELD_TYPE.setOmitNorms(true);
FIELD_TYPE.setIndexOptions(IndexOptions.DOCS_ONLY);
FIELD_TYPE.freeze();
FIELD_TYPE_PRE_1_3_0 = new FieldType(FIELD_TYPE);
FIELD_TYPE_PRE_1_3_0.setIndexed(false);
FIELD_TYPE_PRE_1_3_0.freeze();
}
}

public static class Builder extends AbstractFieldMapper.Builder<Builder, FieldNamesFieldMapper> {

private boolean indexIsExplicit;

public Builder() {
super(Defaults.NAME, new FieldType(Defaults.FIELD_TYPE));
indexName = Defaults.INDEX_NAME;
}

@Override
public Builder index(boolean index) {
indexIsExplicit = true;
return super.index(index);
}

@Override
public FieldNamesFieldMapper build(BuilderContext context) {
if ((context.indexCreatedVersion() == null || context.indexCreatedVersion().before(Version.V_1_3_0)) && !indexIsExplicit) {
fieldType.setIndexed(false);
}
return new FieldNamesFieldMapper(name, indexName, boost, fieldType, postingsProvider, docValuesProvider, fieldDataSettings, context.indexSettings());
}
}

public static class TypeParser implements Mapper.TypeParser {
@Override
public Mapper.Builder parse(String name, Map<String, Object> node, ParserContext parserContext) throws MapperParsingException {
FieldNamesFieldMapper.Builder builder = fieldNames();
parseField(builder, builder.name, node, parserContext);
return builder;
}
}

private final FieldType defaultFieldType;

private static FieldType defaultFieldType(Settings indexSettings) {
return indexSettings != null && Version.indexCreated(indexSettings).onOrAfter(Version.V_1_3_0) ? Defaults.FIELD_TYPE : Defaults.FIELD_TYPE_PRE_1_3_0;
}

public FieldNamesFieldMapper(Settings indexSettings) {
this(Defaults.NAME, Defaults.INDEX_NAME, indexSettings);
}

protected FieldNamesFieldMapper(String name, String indexName, Settings indexSettings) {
this(name, indexName, Defaults.BOOST, new FieldType(defaultFieldType(indexSettings)), null, null, null, indexSettings);
}

public FieldNamesFieldMapper(String name, String indexName, float boost, FieldType fieldType, PostingsFormatProvider postingsProvider,
DocValuesFormatProvider docValuesProvider, @Nullable Settings fieldDataSettings, Settings indexSettings) {
super(new Names(name, indexName, indexName, name), boost, fieldType, null, Lucene.KEYWORD_ANALYZER,
Lucene.KEYWORD_ANALYZER, postingsProvider, docValuesProvider, null, null, fieldDataSettings, indexSettings);
this.defaultFieldType = defaultFieldType(indexSettings);
}

@Override
public FieldType defaultFieldType() {
return defaultFieldType;
}

@Override
public FieldDataType defaultFieldDataType() {
return new FieldDataType("string");
}

@Override
public String value(Object value) {
if (value == null) {
return null;
}
return value.toString();
}

@Override
public boolean useTermQueryWithQueryString() {
return true;
}

@Override
public void preParse(ParseContext context) throws IOException {
}

@Override
public void postParse(ParseContext context) throws IOException {
super.parse(context);
}

@Override
public void parse(ParseContext context) throws IOException {
// we parse in post parse
}

@Override
public boolean includeInObject() {
return false;
}

static Iterable<String> extractFieldNames(final String fullPath) {
return new Iterable<String>() {
@Override
public Iterator<String> iterator() {
return new UnmodifiableIterator<String>() {

int endIndex = nextEndIndex(0);

private int nextEndIndex(int index) {
while (index < fullPath.length() && fullPath.charAt(index) != '.') {
index += 1;
}
return index;
}

@Override
public boolean hasNext() {
return endIndex <= fullPath.length();
}

@Override
public String next() {
final String result = fullPath.substring(0, endIndex);
endIndex = nextEndIndex(endIndex + 1);
return result;
}

};
}
};
}

@Override
protected void parseCreateField(ParseContext context, List<Field> fields) throws IOException {
if (!fieldType.indexed() && !fieldType.stored() && !hasDocValues()) {
return;
}
for (ParseContext.Document document : context.docs()) {
final List<String> paths = new ArrayList<>();
for (IndexableField field : document.getFields()) {
paths.add(field.name());
}
for (String path : paths) {
for (String fieldName : extractFieldNames(path)) {
if (fieldType.indexed() || fieldType.stored()) {
document.add(new XStringField(names().indexName(), fieldName, fieldType));
}
if (hasDocValues()) {
document.add(new SortedSetDocValuesField(names().indexName(), new BytesRef(fieldName)));
}
}
}
}
}

@Override
protected String contentType() {
return CONTENT_TYPE;
}

@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
XContentBuilder json = XContentFactory.jsonBuilder();
super.toXContent(json, params);
if (json.string().equals("\"" + NAME + "\"{\"type\":\"" + CONTENT_TYPE + "\"}")) {
return builder;
}
return super.toXContent(builder, params);
}
}
Expand Up @@ -27,7 +27,9 @@
import org.elasticsearch.common.lucene.search.XBooleanFilter;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.cache.filter.support.CacheKeyFilter;
import org.elasticsearch.index.mapper.FieldMappers;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.mapper.internal.FieldNamesFieldMapper;

import java.io.IOException;
import java.util.Set;
Expand Down Expand Up @@ -81,6 +83,8 @@ public Filter parse(QueryParseContext parseContext) throws IOException, QueryPar
}

public static Filter newFilter(QueryParseContext parseContext, String fieldPattern, String filterName) {
final FieldMappers fieldNamesMapper = parseContext.mapperService().indexName(FieldNamesFieldMapper.CONTENT_TYPE);

MapperService.SmartNameObjectMapper smartNameObjectMapper = parseContext.smartObjectMapper(fieldPattern);
if (smartNameObjectMapper != null && smartNameObjectMapper.hasMapper()) {
// automatic make the object mapper pattern
Expand All @@ -101,7 +105,17 @@ public static Filter newFilter(QueryParseContext parseContext, String fieldPatte
nonNullFieldMappers = smartNameFieldMappers;
}
Filter filter = null;
if (smartNameFieldMappers != null && smartNameFieldMappers.hasMapper()) {
if (fieldNamesMapper!= null && fieldNamesMapper.mapper().fieldType().indexed()) {
final String f;
if (smartNameFieldMappers != null && smartNameFieldMappers.hasMapper()) {
f = smartNameFieldMappers.mapper().names().indexName();
} else {
f = field;
}
filter = fieldNamesMapper.mapper().termFilter(f, parseContext);
}
// if _field_names are not indexed, we need to go the slow way
if (filter == null && smartNameFieldMappers != null && smartNameFieldMappers.hasMapper()) {
filter = smartNameFieldMappers.mapper().rangeFilter(null, null, true, true, parseContext);
}
if (filter == null) {
Expand Down

0 comments on commit 703dbff

Please sign in to comment.