Skip to content

Commit

Permalink
Make field data changes immediately taken into account and add the ab…
Browse files Browse the repository at this point in the history
…ility to disallow field data loading.

This commit changes field data configuration updates so that they are
immediately taken into account for loading new segments. The way it works
is that field data configuration is now cached separately from the field
data cache, meaning that it is now possible to clear the field data
configuration from IndexFieldDataService while the cache will stay around. On
the next time that Elasticsearch will reload field data configuration, it will
check if there is already a cache entry, and reuse it if it exists.

To disable field data loading, all that is required is to change the field
data format to "none" (supported by all field data types) using the update
mapping API. Elasticsearch will then refuse to load field data on any new
segment, but field data which has been loaded on the previous segments will
remain available. So you need to clear the field data cache in order to
reclaim memory (otherwise memory will be reclaimed slower, as segments get
merged).

Close #4430
Close #4431
  • Loading branch information
jpountz committed Dec 16, 2013
1 parent 75b90d4 commit 9b8f750
Show file tree
Hide file tree
Showing 9 changed files with 399 additions and 34 deletions.
64 changes: 64 additions & 0 deletions docs/reference/index-modules/fielddata.asciidoc
Expand Up @@ -24,6 +24,70 @@ field data after a certain time of inactivity. Defaults to `-1`. For
example, can be set to `5m` for a 5 minute expiry.
|=======================================================================

=== Field data formats

The field data format controls how field data should be stored.

Depending on the field type, there might be several field data types
available.

Here is an example of how to configure the `tag` field to use the `fst` field
data format.

[source,js]
--------------------------------------------------
{
tag: {
type: "string",
fielddata: {
format: "fst"
}
}
}
--------------------------------------------------

It is possible to change the field data format (and the field data settings
in general) on a live index by using the update mapping API. When doing so,
field data which had already been loaded for existing segments will remain
alive while new segments will use the new field data configuration. Thanks to
the background merging process, all segments will eventually use the new
field data format.

[float]
==== Disallowing field data loading

Field data can take a lot of RAM so it makes sense to disable field data
loading on the fields that don't need field data, for example those that are
used for full-text search only. In order to disable field data loading, just
change the field data type to `disabled`. Request that will try to load field
data on any field which is configured with this format will then return an
error.

The `disabled` format is supported by all field types.

[float]
==== String field data types

`paged_bytes` (default)::
Stores unique terms sequentially in a large buffer and maps documents to
the indices of the terms they contain in this large buffer.

`fst`::
Stores terms in a FST. Slower to build than `paged_bytes` but can help lower
memory usage if many terms share common prefixes and/or suffixes.

[float]
==== Numeric field data types

`array` (default)::
Stores field values in memory using arrays.

[float]
==== Geo point field data types

`array` (default)::
Stores latitudes and longitudes in arrays.

[float]
=== Fielddata loading

Expand Down
Expand Up @@ -27,6 +27,7 @@
*/
public class FieldDataType {

public static final String FORMAT_KEY = "format";
private static final String LOADING_KEY = "loading";
private static final String EAGER_LOADING_VALUE = "eager";
private static final String LAZY_LOADING_VALUE = "lazy";
Expand Down
Expand Up @@ -20,6 +20,7 @@
package org.elasticsearch.index.fielddata;

import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Maps;
import org.apache.lucene.index.IndexReader;
import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.elasticsearch.common.collect.MapBuilder;
Expand All @@ -36,12 +37,18 @@
import org.elasticsearch.index.settings.IndexSettings;
import org.elasticsearch.indices.fielddata.cache.IndicesFieldDataCache;

import java.util.Map;
import java.util.concurrent.ConcurrentMap;

/**
*/
public class IndexFieldDataService extends AbstractIndexComponent {

private static final String DISABLED_FORMAT = "disabled";
private static final String ARRAY_FORMAT = "array";
private static final String PAGED_BYTES_FORMAT = "paged_bytes";
private static final String FST_FORMAT = "fst";

private final static ImmutableMap<String, IndexFieldData.Builder> buildersByType;
private final static ImmutableMap<Tuple<String, String>, IndexFieldData.Builder> buildersByTypeAndFormat;

Expand All @@ -58,23 +65,40 @@ public class IndexFieldDataService extends AbstractIndexComponent {
.immutableMap();

buildersByTypeAndFormat = MapBuilder.<Tuple<String, String>, IndexFieldData.Builder>newMapBuilder()
.put(Tuple.tuple("string", "paged_bytes"), new PagedBytesIndexFieldData.Builder())
.put(Tuple.tuple("string", "fst"), new FSTBytesIndexFieldData.Builder())
.put(Tuple.tuple("float", "array"), new FloatArrayIndexFieldData.Builder())
.put(Tuple.tuple("double", "array"), new DoubleArrayIndexFieldData.Builder())
.put(Tuple.tuple("byte", "array"), new PackedArrayIndexFieldData.Builder().setNumericType(IndexNumericFieldData.NumericType.BYTE))
.put(Tuple.tuple("short", "array"), new PackedArrayIndexFieldData.Builder().setNumericType(IndexNumericFieldData.NumericType.SHORT))
.put(Tuple.tuple("int", "array"), new PackedArrayIndexFieldData.Builder().setNumericType(IndexNumericFieldData.NumericType.INT))
.put(Tuple.tuple("long", "array"), new PackedArrayIndexFieldData.Builder().setNumericType(IndexNumericFieldData.NumericType.LONG))
.put(Tuple.tuple("geo_point", "array"), new GeoPointDoubleArrayIndexFieldData.Builder())
.put(Tuple.tuple("string", PAGED_BYTES_FORMAT), new PagedBytesIndexFieldData.Builder())
.put(Tuple.tuple("string", FST_FORMAT), new FSTBytesIndexFieldData.Builder())
.put(Tuple.tuple("string", DISABLED_FORMAT), new DisabledIndexFieldData.Builder())

.put(Tuple.tuple("float", ARRAY_FORMAT), new FloatArrayIndexFieldData.Builder())
.put(Tuple.tuple("float", DISABLED_FORMAT), new DisabledIndexFieldData.Builder())

.put(Tuple.tuple("double", ARRAY_FORMAT), new DoubleArrayIndexFieldData.Builder())
.put(Tuple.tuple("double", DISABLED_FORMAT), new DisabledIndexFieldData.Builder())

.put(Tuple.tuple("byte", ARRAY_FORMAT), new PackedArrayIndexFieldData.Builder().setNumericType(IndexNumericFieldData.NumericType.BYTE))
.put(Tuple.tuple("byte", DISABLED_FORMAT), new DisabledIndexFieldData.Builder())

.put(Tuple.tuple("short", ARRAY_FORMAT), new PackedArrayIndexFieldData.Builder().setNumericType(IndexNumericFieldData.NumericType.SHORT))
.put(Tuple.tuple("short", DISABLED_FORMAT), new DisabledIndexFieldData.Builder())

.put(Tuple.tuple("int", ARRAY_FORMAT), new PackedArrayIndexFieldData.Builder().setNumericType(IndexNumericFieldData.NumericType.INT))
.put(Tuple.tuple("int", DISABLED_FORMAT), new DisabledIndexFieldData.Builder())

.put(Tuple.tuple("long", ARRAY_FORMAT), new PackedArrayIndexFieldData.Builder().setNumericType(IndexNumericFieldData.NumericType.LONG))
.put(Tuple.tuple("long", DISABLED_FORMAT), new DisabledIndexFieldData.Builder())

.put(Tuple.tuple("geo_point", ARRAY_FORMAT), new GeoPointDoubleArrayIndexFieldData.Builder())
.put(Tuple.tuple("geo_point", DISABLED_FORMAT), new DisabledIndexFieldData.Builder())
.immutableMap();
}

private final IndicesFieldDataCache indicesFieldDataCache;
private final ConcurrentMap<String, IndexFieldData> loadedFieldData = ConcurrentCollections.newConcurrentMap();
private final ConcurrentMap<String, IndexFieldData<?>> loadedFieldData = ConcurrentCollections.newConcurrentMap();
private final Map<String, IndexFieldDataCache> fieldDataCaches = Maps.newHashMap(); // no need for concurrency support, always used under lock

IndexService indexService;

// public for testing
public IndexFieldDataService(Index index) {
this(index, ImmutableSettings.Builder.EMPTY_SETTINGS, new IndicesFieldDataCache(ImmutableSettings.Builder.EMPTY_SETTINGS));
}
Expand All @@ -96,6 +120,10 @@ public void clear() {
fieldData.clear();
}
loadedFieldData.clear();
for (IndexFieldDataCache cache : fieldDataCaches.values()) {
cache.clear();
}
fieldDataCaches.clear();
}
}

Expand All @@ -105,12 +133,29 @@ public void clearField(String fieldName) {
if (fieldData != null) {
fieldData.clear();
}
IndexFieldDataCache cache = fieldDataCaches.remove(fieldName);
if (cache != null) {
cache.clear();
}
}
}

public void clear(IndexReader reader) {
for (IndexFieldData indexFieldData : loadedFieldData.values()) {
indexFieldData.clear(reader);
synchronized (loadedFieldData) {
for (IndexFieldData<?> indexFieldData : loadedFieldData.values()) {
indexFieldData.clear(reader);
}
for (IndexFieldDataCache cache : fieldDataCaches.values()) {
cache.clear(reader);
}
}
}

public void onMappingUpdate() {
// synchronize to make sure to not miss field data instances that are being loaded
synchronized (loadedFieldData) {
// important: do not clear fieldDataCaches: the cache may be reused
loadedFieldData.clear();
}
}

Expand Down Expand Up @@ -139,18 +184,21 @@ public <IFD extends IndexFieldData> IFD getForField(FieldMapper.Names fieldNames
throw new ElasticSearchIllegalArgumentException("failed to find field data builder for field " + fieldNames.fullName() + ", and type " + type.getType());
}

IndexFieldDataCache cache;
// we default to node level cache, which in turn defaults to be unbounded
// this means changing the node level settings is simple, just set the bounds there
String cacheType = type.getSettings().get("cache", indexSettings.get("index.fielddata.cache", "node"));
if ("resident".equals(cacheType)) {
cache = new IndexFieldDataCache.Resident(indexService, fieldNames, type);
} else if ("soft".equals(cacheType)) {
cache = new IndexFieldDataCache.Soft(indexService, fieldNames, type);
} else if ("node".equals(cacheType)) {
cache = indicesFieldDataCache.buildIndexFieldDataCache(indexService, index, fieldNames, type);
} else {
throw new ElasticSearchIllegalArgumentException("cache type not supported [" + cacheType + "] for field [" + fieldNames.fullName() + "]");
IndexFieldDataCache cache = fieldDataCaches.get(fieldNames.indexName());
if (cache == null) {
// we default to node level cache, which in turn defaults to be unbounded
// this means changing the node level settings is simple, just set the bounds there
String cacheType = type.getSettings().get("cache", indexSettings.get("index.fielddata.cache", "node"));
if ("resident".equals(cacheType)) {
cache = new IndexFieldDataCache.Resident(indexService, fieldNames, type);
} else if ("soft".equals(cacheType)) {
cache = new IndexFieldDataCache.Soft(indexService, fieldNames, type);
} else if ("node".equals(cacheType)) {
cache = indicesFieldDataCache.buildIndexFieldDataCache(indexService, index, fieldNames, type);
} else {
throw new ElasticSearchIllegalArgumentException("cache type not supported [" + cacheType + "] for field [" + fieldNames.fullName() + "]");
}
fieldDataCaches.put(fieldNames.indexName(), cache);
}

fieldData = builder.build(index, indexSettings, fieldNames, type, cache);
Expand All @@ -160,4 +208,5 @@ public <IFD extends IndexFieldData> IFD getForField(FieldMapper.Names fieldNames
}
return (IFD) fieldData;
}

}
@@ -0,0 +1,68 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.elasticsearch.index.fielddata.plain;

import org.apache.lucene.index.AtomicReaderContext;
import org.elasticsearch.ElasticSearchIllegalStateException;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.fielddata.*;
import org.elasticsearch.index.fielddata.fieldcomparator.SortMode;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.index.mapper.FieldMapper.Names;
import org.elasticsearch.index.settings.IndexSettings;

/**
* A field data implementation that forbids loading and will throw an {@link ElasticSearchIllegalStateException} if you try to load
* {@link AtomicFieldData} instances.
*/
public final class DisabledIndexFieldData extends AbstractIndexFieldData<AtomicFieldData<?>> {

public static class Builder implements IndexFieldData.Builder {
@Override
public IndexFieldData<AtomicFieldData<?>> build(Index index, @IndexSettings Settings indexSettings, FieldMapper.Names fieldNames, FieldDataType type, IndexFieldDataCache cache) {
return new DisabledIndexFieldData(index, indexSettings, fieldNames, type, cache);
}
}

public DisabledIndexFieldData(Index index, Settings indexSettings, Names fieldNames, FieldDataType fieldDataType, IndexFieldDataCache cache) {
super(index, indexSettings, fieldNames, fieldDataType, cache);
}

@Override
public boolean valuesOrdered() {
return false;
}

@Override
public AtomicFieldData<?> loadDirect(AtomicReaderContext context) throws Exception {
throw fail();
}

@Override
public IndexFieldData.XFieldComparatorSource comparatorSource(Object missingValue, SortMode sortMode) {
throw fail();
}

private ElasticSearchIllegalStateException fail() {
return new ElasticSearchIllegalStateException("Field data loading is forbidden on " + getFieldNames().name());
}

}
Expand Up @@ -45,6 +45,7 @@
import org.elasticsearch.index.Index;
import org.elasticsearch.index.analysis.AnalysisService;
import org.elasticsearch.index.codec.postingsformat.PostingsFormatService;
import org.elasticsearch.index.fielddata.IndexFieldDataService;
import org.elasticsearch.index.mapper.internal.TypeFieldMapper;
import org.elasticsearch.index.mapper.object.ObjectMapper;
import org.elasticsearch.index.search.nested.NonNestedDocsFilter;
Expand Down Expand Up @@ -74,7 +75,7 @@ public class MapperService extends AbstractIndexComponent implements Iterable<Do
public static final String DEFAULT_MAPPING = "_default_";

private final AnalysisService analysisService;
private final PostingsFormatService postingsFormatService;
private final IndexFieldDataService fieldDataService;

/**
* Will create types automatically if they do not exists in the mapping definition yet
Expand All @@ -101,11 +102,11 @@ public class MapperService extends AbstractIndexComponent implements Iterable<Do
private final SmartIndexNameSearchQuoteAnalyzer searchQuoteAnalyzer;

@Inject
public MapperService(Index index, @IndexSettings Settings indexSettings, Environment environment, AnalysisService analysisService,
public MapperService(Index index, @IndexSettings Settings indexSettings, Environment environment, AnalysisService analysisService, IndexFieldDataService fieldDataService,
PostingsFormatService postingsFormatService, SimilarityLookupService similarityLookupService) {
super(index, indexSettings);
this.analysisService = analysisService;
this.postingsFormatService = postingsFormatService;
this.fieldDataService = fieldDataService;
this.documentParser = new DocumentMapperParser(index, indexSettings, analysisService, postingsFormatService, similarityLookupService);
this.searchAnalyzer = new SmartIndexNameSearchAnalyzer(analysisService.defaultSearchAnalyzer());
this.searchQuoteAnalyzer = new SmartIndexNameSearchQuoteAnalyzer(analysisService.defaultSearchQuoteAnalyzer());
Expand Down Expand Up @@ -232,6 +233,7 @@ private DocumentMapper merge(DocumentMapper mapper) {
logger.debug("merging mapping for type [{}] resulted in conflicts: [{}]", mapper.type(), Arrays.toString(result.conflicts()));
}
}
fieldDataService.onMappingUpdate();
return oldMapper;
} else {
FieldMapperListener.Aggregator fieldMappersAgg = new FieldMapperListener.Aggregator();
Expand Down

0 comments on commit 9b8f750

Please sign in to comment.