elastic · jpountz · Dec 12, 2013 · Dec 12, 2013 · Dec 13, 2013 · Dec 15, 2013
diff --git a/docs/reference/index-modules/fielddata.asciidoc b/docs/reference/index-modules/fielddata.asciidoc
@@ -26,13 +26,18 @@ example, can be set to `5m` for a 5 minute expiry.
 
 === Field data formats
 
+The field data format controls how field data should be stored.
+
 Depending on the field type, there might be several field data types
 available. In particular, string and numeric types support the `doc_values`
 format which allows for computing the field data data-structures at indexing
 time and storing them on disk. Although it will make the index larger and may
 be slightly slower, this implementation will be more near-realtime-friendly
 and will require much less memory from the JVM than other implementations.
 
+Here is an example of how to configure the `tag` field to use the `fst` field
+data format.
+
 [source,js]
 --------------------------------------------------
 {
@@ -45,6 +50,26 @@ and will require much less memory from the JVM than other implementations.
 }
 --------------------------------------------------
 
+It is possible to change the field data format (and the field data settings
+in general) on a live index by using the update mapping API. When doing so,
+field data which had already been loaded for existing segments will remain
+alive while new segments will use the new field data configuration. Thanks to
+the background merging process, all segments will eventually use the new
+field data format.
+
+[float]
+==== Disallowing field data loading
+
+Loading field data into memory can easily take gigabytes of RAM. If your
+cluster's memory usage is close to its maximum capacity, loading field data
+of a high-cardinality string field by mistake is likely to take your whole
+cluster down. In order to prevent it, Elasticsearch has the special `disabled`
+field data type that prevents field data from being loaded into memory and
+will cause all requests that would need field data for this field to return
+an error.
+
+The `disabled` format is supported by all field types.
+
 [float]
 ==== String field data types
 

diff --git a/src/main/java/org/elasticsearch/index/fielddata/IndexFieldDataService.java b/src/main/java/org/elasticsearch/index/fielddata/IndexFieldDataService.java
@@ -20,6 +20,7 @@
 package org.elasticsearch.index.fielddata;
 
 import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Maps;
 import org.apache.lucene.index.IndexReader;
 import org.elasticsearch.ElasticSearchIllegalArgumentException;
 import org.elasticsearch.common.collect.MapBuilder;
@@ -36,12 +37,19 @@
 import org.elasticsearch.index.settings.IndexSettings;
 import org.elasticsearch.indices.fielddata.cache.IndicesFieldDataCache;
 
+import java.util.Map;
 import java.util.concurrent.ConcurrentMap;
 
 /**
  */
 public class IndexFieldDataService extends AbstractIndexComponent {
 
+    private static final String DISABLED_FORMAT = "disabled";
+    private static final String DOC_VALUES_FORMAT = "doc_values";
+    private static final String ARRAY_FORMAT = "array";
+    private static final String PAGED_BYTES_FORMAT = "paged_bytes";
+    private static final String FST_FORMAT = "fst";
+
     private final static ImmutableMap<String, IndexFieldData.Builder> buildersByType;
     private final static ImmutableMap<String, IndexFieldData.Builder> docValuesBuildersByType;
     private final static ImmutableMap<Tuple<String, String>, IndexFieldData.Builder> buildersByTypeAndFormat;
@@ -69,30 +77,47 @@ public class IndexFieldDataService extends AbstractIndexComponent {
                 .immutableMap();
 
         buildersByTypeAndFormat = MapBuilder.<Tuple<String, String>, IndexFieldData.Builder>newMapBuilder()
-                .put(Tuple.tuple("string", "paged_bytes"), new PagedBytesIndexFieldData.Builder())
-                .put(Tuple.tuple("string", "fst"), new FSTBytesIndexFieldData.Builder())
-                .put(Tuple.tuple("string", "doc_values"), new DocValuesIndexFieldData.Builder())
-                .put(Tuple.tuple("float", "array"), new FloatArrayIndexFieldData.Builder())
-                .put(Tuple.tuple("float", "doc_values"), new DocValuesIndexFieldData.Builder().numericType(IndexNumericFieldData.NumericType.FLOAT))
-                .put(Tuple.tuple("double", "array"), new DoubleArrayIndexFieldData.Builder())
-                .put(Tuple.tuple("double", "doc_values"), new DocValuesIndexFieldData.Builder().numericType(IndexNumericFieldData.NumericType.DOUBLE))
-                .put(Tuple.tuple("byte", "array"), new PackedArrayIndexFieldData.Builder().setNumericType(IndexNumericFieldData.NumericType.BYTE))
-                .put(Tuple.tuple("byte", "doc_values"), new DocValuesIndexFieldData.Builder().numericType(IndexNumericFieldData.NumericType.BYTE))
-                .put(Tuple.tuple("short", "array"), new PackedArrayIndexFieldData.Builder().setNumericType(IndexNumericFieldData.NumericType.SHORT))
-                .put(Tuple.tuple("short", "doc_values"), new DocValuesIndexFieldData.Builder().numericType(IndexNumericFieldData.NumericType.SHORT))
-                .put(Tuple.tuple("int", "array"), new PackedArrayIndexFieldData.Builder().setNumericType(IndexNumericFieldData.NumericType.INT))
-                .put(Tuple.tuple("int", "doc_values"), new DocValuesIndexFieldData.Builder().numericType(IndexNumericFieldData.NumericType.INT))
-                .put(Tuple.tuple("long", "array"), new PackedArrayIndexFieldData.Builder().setNumericType(IndexNumericFieldData.NumericType.LONG))
-                .put(Tuple.tuple("long", "doc_values"), new DocValuesIndexFieldData.Builder().numericType(IndexNumericFieldData.NumericType.LONG))
-                .put(Tuple.tuple("geo_point", "array"), new GeoPointDoubleArrayIndexFieldData.Builder())
+                .put(Tuple.tuple("string", PAGED_BYTES_FORMAT), new PagedBytesIndexFieldData.Builder())
+                .put(Tuple.tuple("string", FST_FORMAT), new FSTBytesIndexFieldData.Builder())
+                .put(Tuple.tuple("string", DOC_VALUES_FORMAT), new DocValuesIndexFieldData.Builder())
+                .put(Tuple.tuple("string", DISABLED_FORMAT), new DisabledIndexFieldData.Builder())
+
+                .put(Tuple.tuple("float", ARRAY_FORMAT), new FloatArrayIndexFieldData.Builder())
+                .put(Tuple.tuple("float", DOC_VALUES_FORMAT), new DocValuesIndexFieldData.Builder().numericType(IndexNumericFieldData.NumericType.FLOAT))
+                .put(Tuple.tuple("float", DISABLED_FORMAT), new DisabledIndexFieldData.Builder())
+
+                .put(Tuple.tuple("double", ARRAY_FORMAT), new DoubleArrayIndexFieldData.Builder())
+                .put(Tuple.tuple("double", DOC_VALUES_FORMAT), new DocValuesIndexFieldData.Builder().numericType(IndexNumericFieldData.NumericType.DOUBLE))
+                .put(Tuple.tuple("double", DISABLED_FORMAT), new DisabledIndexFieldData.Builder())
+
+                .put(Tuple.tuple("byte", ARRAY_FORMAT), new PackedArrayIndexFieldData.Builder().setNumericType(IndexNumericFieldData.NumericType.BYTE))
+                .put(Tuple.tuple("byte", DOC_VALUES_FORMAT), new DocValuesIndexFieldData.Builder().numericType(IndexNumericFieldData.NumericType.BYTE))
+                .put(Tuple.tuple("byte", DISABLED_FORMAT), new DisabledIndexFieldData.Builder())
+
+                .put(Tuple.tuple("short", ARRAY_FORMAT), new PackedArrayIndexFieldData.Builder().setNumericType(IndexNumericFieldData.NumericType.SHORT))
+                .put(Tuple.tuple("short", DOC_VALUES_FORMAT), new DocValuesIndexFieldData.Builder().numericType(IndexNumericFieldData.NumericType.SHORT))
+                .put(Tuple.tuple("short", DISABLED_FORMAT), new DisabledIndexFieldData.Builder())
+
+                .put(Tuple.tuple("int", ARRAY_FORMAT), new PackedArrayIndexFieldData.Builder().setNumericType(IndexNumericFieldData.NumericType.INT))
+                .put(Tuple.tuple("int", DOC_VALUES_FORMAT), new DocValuesIndexFieldData.Builder().numericType(IndexNumericFieldData.NumericType.INT))
+                .put(Tuple.tuple("int", DISABLED_FORMAT), new DisabledIndexFieldData.Builder())
+
+                .put(Tuple.tuple("long", ARRAY_FORMAT), new PackedArrayIndexFieldData.Builder().setNumericType(IndexNumericFieldData.NumericType.LONG))
+                .put(Tuple.tuple("long", DOC_VALUES_FORMAT), new DocValuesIndexFieldData.Builder().numericType(IndexNumericFieldData.NumericType.LONG))
+                .put(Tuple.tuple("long", DISABLED_FORMAT), new DisabledIndexFieldData.Builder())
+
+                .put(Tuple.tuple("geo_point", ARRAY_FORMAT), new GeoPointDoubleArrayIndexFieldData.Builder())
+                .put(Tuple.tuple("geo_point", DISABLED_FORMAT), new DisabledIndexFieldData.Builder())
                 .immutableMap();
     }
 
     private final IndicesFieldDataCache indicesFieldDataCache;
     private final ConcurrentMap<String, IndexFieldData<?>> loadedFieldData = ConcurrentCollections.newConcurrentMap();
+    private final Map<String, IndexFieldDataCache> fieldDataCaches = Maps.newHashMap(); // no need for concurrency support, always used under lock
 
     IndexService indexService;
 
+    // public for testing
     public IndexFieldDataService(Index index) {
         this(index, ImmutableSettings.Builder.EMPTY_SETTINGS, new IndicesFieldDataCache(ImmutableSettings.Builder.EMPTY_SETTINGS));
     }
@@ -114,6 +139,10 @@ public void clear() {
                 fieldData.clear();
             }
             loadedFieldData.clear();
+            for (IndexFieldDataCache cache : fieldDataCaches.values()) {
+                cache.clear();
+            }
+            fieldDataCaches.clear();
         }
     }
 
@@ -123,12 +152,29 @@ public void clearField(String fieldName) {
             if (fieldData != null) {
                 fieldData.clear();
             }
+            IndexFieldDataCache cache = fieldDataCaches.remove(fieldName);
+            if (cache != null) {
+                cache.clear();
+            }
         }
     }
 
     public void clear(IndexReader reader) {
-        for (IndexFieldData<?> indexFieldData : loadedFieldData.values()) {
-            indexFieldData.clear(reader);
+        synchronized (loadedFieldData) {
+            for (IndexFieldData<?> indexFieldData : loadedFieldData.values()) {
+                indexFieldData.clear(reader);
+            }
+            for (IndexFieldDataCache cache : fieldDataCaches.values()) {
+                cache.clear(reader);
+            }
+        }
+    }
+
+    public void onMappingUpdate() {
+        // synchronize to make sure to not miss field data instances that are being loaded
+        synchronized (loadedFieldData) {
+            // important: do not clear fieldDataCaches: the cache may be reused
+            loadedFieldData.clear();
         }
     }
 
@@ -163,18 +209,21 @@ public <IFD extends IndexFieldData<?>> IFD getForField(FieldMapper<?> mapper) {
                         throw new ElasticSearchIllegalArgumentException("failed to find field data builder for field " + fieldNames.fullName() + ", and type " + type.getType());
                     }
 
-                    IndexFieldDataCache cache;
-                    //  we default to node level cache, which in turn defaults to be unbounded
-                    // this means changing the node level settings is simple, just set the bounds there
-                    String cacheType = type.getSettings().get("cache", indexSettings.get("index.fielddata.cache", "node"));
-                    if ("resident".equals(cacheType)) {
-                        cache = new IndexFieldDataCache.Resident(indexService, fieldNames, type);
-                    } else if ("soft".equals(cacheType)) {
-                        cache = new IndexFieldDataCache.Soft(indexService, fieldNames, type);
-                    } else if ("node".equals(cacheType)) {
-                        cache = indicesFieldDataCache.buildIndexFieldDataCache(indexService, index, fieldNames, type);
-                    } else {
-                        throw new ElasticSearchIllegalArgumentException("cache type not supported [" + cacheType + "] for field [" + fieldNames.fullName() + "]");
+                    IndexFieldDataCache cache = fieldDataCaches.get(fieldNames.indexName());
+                    if (cache == null) {
+                        //  we default to node level cache, which in turn defaults to be unbounded
+                        // this means changing the node level settings is simple, just set the bounds there
+                        String cacheType = type.getSettings().get("cache", indexSettings.get("index.fielddata.cache", "node"));
+                        if ("resident".equals(cacheType)) {
+                            cache = new IndexFieldDataCache.Resident(indexService, fieldNames, type);
+                        } else if ("soft".equals(cacheType)) {
+                            cache = new IndexFieldDataCache.Soft(indexService, fieldNames, type);
+                        } else if ("node".equals(cacheType)) {
+                            cache = indicesFieldDataCache.buildIndexFieldDataCache(indexService, index, fieldNames, type);
+                        } else {
+                            throw new ElasticSearchIllegalArgumentException("cache type not supported [" + cacheType + "] for field [" + fieldNames.fullName() + "]");
+                        }
+                        fieldDataCaches.put(fieldNames.indexName(), cache);
                     }
 
                     fieldData = builder.build(index, indexSettings, fieldNames, type, cache);
@@ -184,4 +233,5 @@ public <IFD extends IndexFieldData<?>> IFD getForField(FieldMapper<?> mapper) {
         }
         return (IFD) fieldData;
     }
+
 }
diff --git a/src/main/java/org/elasticsearch/index/fielddata/plain/DisabledIndexFieldData.java b/src/main/java/org/elasticsearch/index/fielddata/plain/DisabledIndexFieldData.java
@@ -0,0 +1,68 @@
+/*
+ * Licensed to ElasticSearch and Shay Banon under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. ElasticSearch licenses this
+ * file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.index.fielddata.plain;
+
+import org.apache.lucene.index.AtomicReaderContext;
+import org.elasticsearch.ElasticSearchIllegalStateException;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.index.Index;
+import org.elasticsearch.index.fielddata.*;
+import org.elasticsearch.index.fielddata.fieldcomparator.SortMode;
+import org.elasticsearch.index.mapper.FieldMapper;
+import org.elasticsearch.index.mapper.FieldMapper.Names;
+import org.elasticsearch.index.settings.IndexSettings;
+
+/**
+ * A field data implementation that forbids loading and will throw an {@link ElasticSearchIllegalStateException} if you try to load
+ * {@link AtomicFieldData} instances.
+ */
+public final class DisabledIndexFieldData extends AbstractIndexFieldData<AtomicFieldData<?>> {
+
+    public static class Builder implements IndexFieldData.Builder {
+        @Override
+        public IndexFieldData<AtomicFieldData<?>> build(Index index, @IndexSettings Settings indexSettings, FieldMapper.Names fieldNames, FieldDataType type, IndexFieldDataCache cache) {
+            return new DisabledIndexFieldData(index, indexSettings, fieldNames, type, cache);
+        }
+    }
+
+    public DisabledIndexFieldData(Index index, Settings indexSettings, Names fieldNames, FieldDataType fieldDataType, IndexFieldDataCache cache) {
+        super(index, indexSettings, fieldNames, fieldDataType, cache);
+    }
+
+    @Override
+    public boolean valuesOrdered() {
+        return false;
+    }
+
+    @Override
+    public AtomicFieldData<?> loadDirect(AtomicReaderContext context) throws Exception {
+        throw fail();
+    }
+
+    @Override
+    public IndexFieldData.XFieldComparatorSource comparatorSource(Object missingValue, SortMode sortMode) {
+        throw fail();
+    }
+
+    private ElasticSearchIllegalStateException fail() {
+        return new ElasticSearchIllegalStateException("Field data loading is forbidden on " + getFieldNames().name());
+    }
+
+}
diff --git a/src/main/java/org/elasticsearch/index/mapper/MapperService.java b/src/main/java/org/elasticsearch/index/mapper/MapperService.java
@@ -48,6 +48,7 @@
 import org.elasticsearch.index.analysis.AnalysisService;
 import org.elasticsearch.index.codec.docvaluesformat.DocValuesFormatService;
 import org.elasticsearch.index.codec.postingsformat.PostingsFormatService;
+import org.elasticsearch.index.fielddata.IndexFieldDataService;
 import org.elasticsearch.index.mapper.internal.TypeFieldMapper;
 import org.elasticsearch.index.mapper.object.ObjectMapper;
 import org.elasticsearch.index.search.nested.NonNestedDocsFilter;
@@ -76,6 +77,7 @@ public class MapperService extends AbstractIndexComponent implements Iterable<Do
     public static final String DEFAULT_MAPPING = "_default_";
 
     private final AnalysisService analysisService;
+    private final IndexFieldDataService fieldDataService;
 
     /**
      * Will create types automatically if they do not exists in the mapping definition yet
@@ -105,10 +107,11 @@ public class MapperService extends AbstractIndexComponent implements Iterable<Do
     private final List<DocumentTypeListener> typeListeners = new CopyOnWriteArrayList<DocumentTypeListener>();
 
     @Inject
-    public MapperService(Index index, @IndexSettings Settings indexSettings, Environment environment, AnalysisService analysisService,
+    public MapperService(Index index, @IndexSettings Settings indexSettings, Environment environment, AnalysisService analysisService, IndexFieldDataService fieldDataService,
                          PostingsFormatService postingsFormatService, DocValuesFormatService docValuesFormatService, SimilarityLookupService similarityLookupService) {
         super(index, indexSettings);
         this.analysisService = analysisService;
+        this.fieldDataService = fieldDataService;
         this.documentParser = new DocumentMapperParser(index, indexSettings, analysisService, postingsFormatService, docValuesFormatService, similarityLookupService);
         this.searchAnalyzer = new SmartIndexNameSearchAnalyzer(analysisService.defaultSearchAnalyzer());
         this.searchQuoteAnalyzer = new SmartIndexNameSearchQuoteAnalyzer(analysisService.defaultSearchQuoteAnalyzer());
@@ -278,6 +281,7 @@ private DocumentMapper merge(DocumentMapper mapper) {
                         logger.debug("merging mapping for type [{}] resulted in conflicts: [{}]", mapper.type(), Arrays.toString(result.conflicts()));
                     }
                 }
+                fieldDataService.onMappingUpdate();
                 return oldMapper;
             } else {
                 FieldMapperListener.Aggregator fieldMappersAgg = new FieldMapperListener.Aggregator();