Skip to content

Commit

Permalink
Make field data changes immediately taken into account and add the ab…
Browse files Browse the repository at this point in the history
…ility to disallow field data loading.

This commit changes field data configuration updates so that they are
immediately taken into account for loading new segments. The way it works
is that field data configuration is now cached separately from the field
data cache, meaning that it is now possible to clear the field data
configuration from IndexFieldDataService while the cache will stay around. On
the next time that Elasticsearch will reload field data configuration, it will
check if there is already a cache entry, and reuse it if it exists.

To disable field data loading, all that is required is to change the field
data format to "none" (supported by all field data types) using the update
mapping API. Elasticsearch will then refuse to load field data on any new
segment, but field data which has been loaded on the previous segments will
remain available. So you need to clear the field data cache in order to
reclaim memory (otherwise memory will be reclaimed slower, as segments get
merged).

Close elastic#4430
Close elastic#4431
  • Loading branch information
jpountz authored and brusic committed Jan 19, 2014
1 parent 09e6f11 commit cc0855a
Show file tree
Hide file tree
Showing 8 changed files with 332 additions and 41 deletions.
24 changes: 24 additions & 0 deletions docs/reference/index-modules/fielddata.asciidoc
Expand Up @@ -26,13 +26,18 @@ example, can be set to `5m` for a 5 minute expiry.

=== Field data formats

The field data format controls how field data should be stored.

Depending on the field type, there might be several field data types
available. In particular, string and numeric types support the `doc_values`
format which allows for computing the field data data-structures at indexing
time and storing them on disk. Although it will make the index larger and may
be slightly slower, this implementation will be more near-realtime-friendly
and will require much less memory from the JVM than other implementations.

Here is an example of how to configure the `tag` field to use the `fst` field
data format.

[source,js]
--------------------------------------------------
{
Expand All @@ -45,6 +50,25 @@ and will require much less memory from the JVM than other implementations.
}
--------------------------------------------------

It is possible to change the field data format (and the field data settings
in general) on a live index by using the update mapping API. When doing so,
field data which had already been loaded for existing segments will remain
alive while new segments will use the new field data configuration. Thanks to
the background merging process, all segments will eventually use the new
field data format.

[float]
==== Disallowing field data loading

Field data can take a lot of RAM so it makes sense to disable field data
loading on the fields that don't need field data, for example those that are
used for full-text search only. In order to disable field data loading, just
change the field data type to `disabled`. Request that will try to load field
data on any field which is configured with this format will then return an
error.

The `disabled` format is supported by all field types.

[float]
==== String field data types

Expand Down
Expand Up @@ -20,6 +20,7 @@
package org.elasticsearch.index.fielddata;

import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Maps;
import org.apache.lucene.index.IndexReader;
import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.elasticsearch.common.collect.MapBuilder;
Expand All @@ -36,12 +37,19 @@
import org.elasticsearch.index.settings.IndexSettings;
import org.elasticsearch.indices.fielddata.cache.IndicesFieldDataCache;

import java.util.Map;
import java.util.concurrent.ConcurrentMap;

/**
*/
public class IndexFieldDataService extends AbstractIndexComponent {

private static final String DISABLED_FORMAT = "disabled";
private static final String DOC_VALUES_FORMAT = "doc_values";
private static final String ARRAY_FORMAT = "array";
private static final String PAGED_BYTES_FORMAT = "paged_bytes";
private static final String FST_FORMAT = "fst";

private final static ImmutableMap<String, IndexFieldData.Builder> buildersByType;
private final static ImmutableMap<String, IndexFieldData.Builder> docValuesBuildersByType;
private final static ImmutableMap<Tuple<String, String>, IndexFieldData.Builder> buildersByTypeAndFormat;
Expand Down Expand Up @@ -69,30 +77,47 @@ public class IndexFieldDataService extends AbstractIndexComponent {
.immutableMap();

buildersByTypeAndFormat = MapBuilder.<Tuple<String, String>, IndexFieldData.Builder>newMapBuilder()
.put(Tuple.tuple("string", "paged_bytes"), new PagedBytesIndexFieldData.Builder())
.put(Tuple.tuple("string", "fst"), new FSTBytesIndexFieldData.Builder())
.put(Tuple.tuple("string", "doc_values"), new DocValuesIndexFieldData.Builder())
.put(Tuple.tuple("float", "array"), new FloatArrayIndexFieldData.Builder())
.put(Tuple.tuple("float", "doc_values"), new DocValuesIndexFieldData.Builder().numericType(IndexNumericFieldData.NumericType.FLOAT))
.put(Tuple.tuple("double", "array"), new DoubleArrayIndexFieldData.Builder())
.put(Tuple.tuple("double", "doc_values"), new DocValuesIndexFieldData.Builder().numericType(IndexNumericFieldData.NumericType.DOUBLE))
.put(Tuple.tuple("byte", "array"), new PackedArrayIndexFieldData.Builder().setNumericType(IndexNumericFieldData.NumericType.BYTE))
.put(Tuple.tuple("byte", "doc_values"), new DocValuesIndexFieldData.Builder().numericType(IndexNumericFieldData.NumericType.BYTE))
.put(Tuple.tuple("short", "array"), new PackedArrayIndexFieldData.Builder().setNumericType(IndexNumericFieldData.NumericType.SHORT))
.put(Tuple.tuple("short", "doc_values"), new DocValuesIndexFieldData.Builder().numericType(IndexNumericFieldData.NumericType.SHORT))
.put(Tuple.tuple("int", "array"), new PackedArrayIndexFieldData.Builder().setNumericType(IndexNumericFieldData.NumericType.INT))
.put(Tuple.tuple("int", "doc_values"), new DocValuesIndexFieldData.Builder().numericType(IndexNumericFieldData.NumericType.INT))
.put(Tuple.tuple("long", "array"), new PackedArrayIndexFieldData.Builder().setNumericType(IndexNumericFieldData.NumericType.LONG))
.put(Tuple.tuple("long", "doc_values"), new DocValuesIndexFieldData.Builder().numericType(IndexNumericFieldData.NumericType.LONG))
.put(Tuple.tuple("geo_point", "array"), new GeoPointDoubleArrayIndexFieldData.Builder())
.put(Tuple.tuple("string", PAGED_BYTES_FORMAT), new PagedBytesIndexFieldData.Builder())
.put(Tuple.tuple("string", FST_FORMAT), new FSTBytesIndexFieldData.Builder())
.put(Tuple.tuple("string", DOC_VALUES_FORMAT), new DocValuesIndexFieldData.Builder())
.put(Tuple.tuple("string", DISABLED_FORMAT), new DisabledIndexFieldData.Builder())

.put(Tuple.tuple("float", ARRAY_FORMAT), new FloatArrayIndexFieldData.Builder())
.put(Tuple.tuple("float", DOC_VALUES_FORMAT), new DocValuesIndexFieldData.Builder().numericType(IndexNumericFieldData.NumericType.FLOAT))
.put(Tuple.tuple("float", DISABLED_FORMAT), new DisabledIndexFieldData.Builder())

.put(Tuple.tuple("double", ARRAY_FORMAT), new DoubleArrayIndexFieldData.Builder())
.put(Tuple.tuple("double", DOC_VALUES_FORMAT), new DocValuesIndexFieldData.Builder().numericType(IndexNumericFieldData.NumericType.DOUBLE))
.put(Tuple.tuple("double", DISABLED_FORMAT), new DisabledIndexFieldData.Builder())

.put(Tuple.tuple("byte", ARRAY_FORMAT), new PackedArrayIndexFieldData.Builder().setNumericType(IndexNumericFieldData.NumericType.BYTE))
.put(Tuple.tuple("byte", DOC_VALUES_FORMAT), new DocValuesIndexFieldData.Builder().numericType(IndexNumericFieldData.NumericType.BYTE))
.put(Tuple.tuple("byte", DISABLED_FORMAT), new DisabledIndexFieldData.Builder())

.put(Tuple.tuple("short", ARRAY_FORMAT), new PackedArrayIndexFieldData.Builder().setNumericType(IndexNumericFieldData.NumericType.SHORT))
.put(Tuple.tuple("short", DOC_VALUES_FORMAT), new DocValuesIndexFieldData.Builder().numericType(IndexNumericFieldData.NumericType.SHORT))
.put(Tuple.tuple("short", DISABLED_FORMAT), new DisabledIndexFieldData.Builder())

.put(Tuple.tuple("int", ARRAY_FORMAT), new PackedArrayIndexFieldData.Builder().setNumericType(IndexNumericFieldData.NumericType.INT))
.put(Tuple.tuple("int", DOC_VALUES_FORMAT), new DocValuesIndexFieldData.Builder().numericType(IndexNumericFieldData.NumericType.INT))
.put(Tuple.tuple("int", DISABLED_FORMAT), new DisabledIndexFieldData.Builder())

.put(Tuple.tuple("long", ARRAY_FORMAT), new PackedArrayIndexFieldData.Builder().setNumericType(IndexNumericFieldData.NumericType.LONG))
.put(Tuple.tuple("long", DOC_VALUES_FORMAT), new DocValuesIndexFieldData.Builder().numericType(IndexNumericFieldData.NumericType.LONG))
.put(Tuple.tuple("long", DISABLED_FORMAT), new DisabledIndexFieldData.Builder())

.put(Tuple.tuple("geo_point", ARRAY_FORMAT), new GeoPointDoubleArrayIndexFieldData.Builder())
.put(Tuple.tuple("geo_point", DISABLED_FORMAT), new DisabledIndexFieldData.Builder())
.immutableMap();
}

private final IndicesFieldDataCache indicesFieldDataCache;
private final ConcurrentMap<String, IndexFieldData<?>> loadedFieldData = ConcurrentCollections.newConcurrentMap();
private final Map<String, IndexFieldDataCache> fieldDataCaches = Maps.newHashMap(); // no need for concurrency support, always used under lock

IndexService indexService;

// public for testing
public IndexFieldDataService(Index index) {
this(index, ImmutableSettings.Builder.EMPTY_SETTINGS, new IndicesFieldDataCache(ImmutableSettings.Builder.EMPTY_SETTINGS));
}
Expand All @@ -114,6 +139,10 @@ public void clear() {
fieldData.clear();
}
loadedFieldData.clear();
for (IndexFieldDataCache cache : fieldDataCaches.values()) {
cache.clear();
}
fieldDataCaches.clear();
}
}

Expand All @@ -123,12 +152,29 @@ public void clearField(String fieldName) {
if (fieldData != null) {
fieldData.clear();
}
IndexFieldDataCache cache = fieldDataCaches.remove(fieldName);
if (cache != null) {
cache.clear();
}
}
}

public void clear(IndexReader reader) {
for (IndexFieldData<?> indexFieldData : loadedFieldData.values()) {
indexFieldData.clear(reader);
synchronized (loadedFieldData) {
for (IndexFieldData<?> indexFieldData : loadedFieldData.values()) {
indexFieldData.clear(reader);
}
for (IndexFieldDataCache cache : fieldDataCaches.values()) {
cache.clear(reader);
}
}
}

public void onMappingUpdate() {
// synchronize to make sure to not miss field data instances that are being loaded
synchronized (loadedFieldData) {
// important: do not clear fieldDataCaches: the cache may be reused
loadedFieldData.clear();
}
}

Expand Down Expand Up @@ -163,18 +209,21 @@ public <IFD extends IndexFieldData<?>> IFD getForField(FieldMapper<?> mapper) {
throw new ElasticSearchIllegalArgumentException("failed to find field data builder for field " + fieldNames.fullName() + ", and type " + type.getType());
}

IndexFieldDataCache cache;
// we default to node level cache, which in turn defaults to be unbounded
// this means changing the node level settings is simple, just set the bounds there
String cacheType = type.getSettings().get("cache", indexSettings.get("index.fielddata.cache", "node"));
if ("resident".equals(cacheType)) {
cache = new IndexFieldDataCache.Resident(indexService, fieldNames, type);
} else if ("soft".equals(cacheType)) {
cache = new IndexFieldDataCache.Soft(indexService, fieldNames, type);
} else if ("node".equals(cacheType)) {
cache = indicesFieldDataCache.buildIndexFieldDataCache(indexService, index, fieldNames, type);
} else {
throw new ElasticSearchIllegalArgumentException("cache type not supported [" + cacheType + "] for field [" + fieldNames.fullName() + "]");
IndexFieldDataCache cache = fieldDataCaches.get(fieldNames.indexName());
if (cache == null) {
// we default to node level cache, which in turn defaults to be unbounded
// this means changing the node level settings is simple, just set the bounds there
String cacheType = type.getSettings().get("cache", indexSettings.get("index.fielddata.cache", "node"));
if ("resident".equals(cacheType)) {
cache = new IndexFieldDataCache.Resident(indexService, fieldNames, type);
} else if ("soft".equals(cacheType)) {
cache = new IndexFieldDataCache.Soft(indexService, fieldNames, type);
} else if ("node".equals(cacheType)) {
cache = indicesFieldDataCache.buildIndexFieldDataCache(indexService, index, fieldNames, type);
} else {
throw new ElasticSearchIllegalArgumentException("cache type not supported [" + cacheType + "] for field [" + fieldNames.fullName() + "]");
}
fieldDataCaches.put(fieldNames.indexName(), cache);
}

fieldData = builder.build(index, indexSettings, fieldNames, type, cache);
Expand All @@ -184,4 +233,5 @@ public <IFD extends IndexFieldData<?>> IFD getForField(FieldMapper<?> mapper) {
}
return (IFD) fieldData;
}

}
@@ -0,0 +1,68 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.elasticsearch.index.fielddata.plain;

import org.apache.lucene.index.AtomicReaderContext;
import org.elasticsearch.ElasticSearchIllegalStateException;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.fielddata.*;
import org.elasticsearch.index.fielddata.fieldcomparator.SortMode;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.index.mapper.FieldMapper.Names;
import org.elasticsearch.index.settings.IndexSettings;

/**
* A field data implementation that forbids loading and will throw an {@link ElasticSearchIllegalStateException} if you try to load
* {@link AtomicFieldData} instances.
*/
public final class DisabledIndexFieldData extends AbstractIndexFieldData<AtomicFieldData<?>> {

public static class Builder implements IndexFieldData.Builder {
@Override
public IndexFieldData<AtomicFieldData<?>> build(Index index, @IndexSettings Settings indexSettings, FieldMapper.Names fieldNames, FieldDataType type, IndexFieldDataCache cache) {
return new DisabledIndexFieldData(index, indexSettings, fieldNames, type, cache);
}
}

public DisabledIndexFieldData(Index index, Settings indexSettings, Names fieldNames, FieldDataType fieldDataType, IndexFieldDataCache cache) {
super(index, indexSettings, fieldNames, fieldDataType, cache);
}

@Override
public boolean valuesOrdered() {
return false;
}

@Override
public AtomicFieldData<?> loadDirect(AtomicReaderContext context) throws Exception {
throw fail();
}

@Override
public IndexFieldData.XFieldComparatorSource comparatorSource(Object missingValue, SortMode sortMode) {
throw fail();
}

private ElasticSearchIllegalStateException fail() {
return new ElasticSearchIllegalStateException("Field data loading is forbidden on " + getFieldNames().name());
}

}
Expand Up @@ -48,6 +48,7 @@
import org.elasticsearch.index.analysis.AnalysisService;
import org.elasticsearch.index.codec.docvaluesformat.DocValuesFormatService;
import org.elasticsearch.index.codec.postingsformat.PostingsFormatService;
import org.elasticsearch.index.fielddata.IndexFieldDataService;
import org.elasticsearch.index.mapper.internal.TypeFieldMapper;
import org.elasticsearch.index.mapper.object.ObjectMapper;
import org.elasticsearch.index.search.nested.NonNestedDocsFilter;
Expand Down Expand Up @@ -76,6 +77,7 @@ public class MapperService extends AbstractIndexComponent implements Iterable<Do
public static final String DEFAULT_MAPPING = "_default_";

private final AnalysisService analysisService;
private final IndexFieldDataService fieldDataService;

/**
* Will create types automatically if they do not exists in the mapping definition yet
Expand Down Expand Up @@ -105,10 +107,11 @@ public class MapperService extends AbstractIndexComponent implements Iterable<Do
private final List<DocumentTypeListener> typeListeners = new CopyOnWriteArrayList<DocumentTypeListener>();

@Inject
public MapperService(Index index, @IndexSettings Settings indexSettings, Environment environment, AnalysisService analysisService,
public MapperService(Index index, @IndexSettings Settings indexSettings, Environment environment, AnalysisService analysisService, IndexFieldDataService fieldDataService,
PostingsFormatService postingsFormatService, DocValuesFormatService docValuesFormatService, SimilarityLookupService similarityLookupService) {
super(index, indexSettings);
this.analysisService = analysisService;
this.fieldDataService = fieldDataService;
this.documentParser = new DocumentMapperParser(index, indexSettings, analysisService, postingsFormatService, docValuesFormatService, similarityLookupService);
this.searchAnalyzer = new SmartIndexNameSearchAnalyzer(analysisService.defaultSearchAnalyzer());
this.searchQuoteAnalyzer = new SmartIndexNameSearchQuoteAnalyzer(analysisService.defaultSearchQuoteAnalyzer());
Expand Down Expand Up @@ -278,6 +281,7 @@ private DocumentMapper merge(DocumentMapper mapper) {
logger.debug("merging mapping for type [{}] resulted in conflicts: [{}]", mapper.type(), Arrays.toString(result.conflicts()));
}
}
fieldDataService.onMappingUpdate();
return oldMapper;
} else {
FieldMapperListener.Aggregator fieldMappersAgg = new FieldMapperListener.Aggregator();
Expand Down

0 comments on commit cc0855a

Please sign in to comment.