Skip to content

Commit

Permalink
Use non analyzed token stream optimization everywhere
Browse files Browse the repository at this point in the history
In the string type, we have an optimization to reuse the StringTokenStream on a thread local when a non analyzed field is used (instead of creating it each time). We should use this across the board on all places where we create a field with a String.
Also, move to a specific XStringField, that we can reuse StringTokenStream instead of copying it.
closes #6001
  • Loading branch information
kimchy committed Apr 30, 2014
1 parent 12f43fb commit 23f200b
Show file tree
Hide file tree
Showing 15 changed files with 94 additions and 103 deletions.
3 changes: 3 additions & 0 deletions core-signatures.txt
Expand Up @@ -51,3 +51,6 @@ java.lang.Math#abs(long)

@defaultMessage Use Long.compare instead we are on Java7
com.google.common.primitives.Longs#compare(long,long)

@defaultMessage we have an optimized XStringField to reduce analysis creation overhead
org.apache.lucene.document.Field#<init>(java.lang.String,java.lang.String,org.apache.lucene.document.FieldType)
62 changes: 62 additions & 0 deletions src/main/java/org/apache/lucene/document/XStringField.java
@@ -0,0 +1,62 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.lucene.document;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.util.CloseableThreadLocal;

import java.io.IOException;

/**
* A string/text field that optimizes the case for non analyzed fields to reuse a thread local token
* stream (instead of creating it each time). This reduces analysis chain overhead and object creation
* (which is significant, yay Attributes).
* <p/>
* Not to be confused with Lucene StringField, this handles analyzed text as well, and relies on providing
* the FieldType. Couldn't come up with a good name for this that is different from Text/String...
*/
public class XStringField extends Field {

private static final CloseableThreadLocal<StringTokenStream> NOT_ANALYZED_TOKENSTREAM = new CloseableThreadLocal<StringTokenStream>() {
@Override
protected StringTokenStream initialValue() {
return new StringTokenStream();
}
};

public XStringField(String name, String value, FieldType fieldType) {
super(name, fieldType);
fieldsData = value;
}

@Override
public TokenStream tokenStream(Analyzer analyzer) throws IOException {
if (!fieldType().indexed()) {
return null;
}
// Only use the cached TokenStream if the value is indexed and not-tokenized
if (fieldType().tokenized()) {
return super.tokenStream(analyzer);
}
StringTokenStream nonAnalyzedTokenStream = NOT_ANALYZED_TOKENSTREAM.get();
nonAnalyzedTokenStream.setValue((String) fieldsData);
return nonAnalyzedTokenStream;
}
}
Expand Up @@ -21,6 +21,7 @@

import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.XStringField;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.queries.TermFilter;
import org.apache.lucene.search.Filter;
Expand Down Expand Up @@ -221,7 +222,7 @@ protected void parseCreateField(ParseContext context, List<Field> fields) throws
if (value == null) {
return;
}
fields.add(new Field(names.indexName(), value ? "T" : "F", fieldType));
fields.add(new XStringField(names.indexName(), value ? "T" : "F", fieldType));
}

@Override
Expand Down
Expand Up @@ -25,6 +25,7 @@
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.XStringField;
import org.apache.lucene.search.suggest.analyzing.XAnalyzingSuggester;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.ElasticsearchIllegalArgumentException;
Expand Down Expand Up @@ -387,18 +388,11 @@ public BytesRef buildPayload(BytesRef surfaceForm, long weight, BytesRef payload
surfaceForm, weight, payload);
}

private static final class SuggestField extends Field {
private static final class SuggestField extends XStringField {
private final BytesRef payload;
private final CompletionTokenStream.ToFiniteStrings toFiniteStrings;
private final ContextMapping.Context ctx;

public SuggestField(String name, ContextMapping.Context ctx, Reader value, FieldType type, BytesRef payload, CompletionTokenStream.ToFiniteStrings toFiniteStrings) {
super(name, value, type);
this.payload = payload;
this.toFiniteStrings = toFiniteStrings;
this.ctx = ctx;
}

public SuggestField(String name, ContextMapping.Context ctx, String value, FieldType type, BytesRef payload, CompletionTokenStream.ToFiniteStrings toFiniteStrings) {
super(name, value, type);
this.payload = payload;
Expand Down
Expand Up @@ -26,6 +26,7 @@
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.document.XStringField;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.search.Filter;
import org.apache.lucene.util.BytesRef;
Expand Down Expand Up @@ -286,7 +287,7 @@ protected void parseCreateField(ParseContext context, List<Field> fields) throws
}

if (fieldType.indexed() || fieldType.stored()) {
Field field = new StringField(names.indexName(), valueAndBoost.value(), fieldType);
Field field = new XStringField(names.indexName(), valueAndBoost.value(), fieldType);
field.setBoost(valueAndBoost.boost());
fields.add(field);
}
Expand Down Expand Up @@ -385,86 +386,6 @@ protected void doXContentBody(XContentBuilder builder, boolean includeDefaults,
}
}

/** Extension of {@link Field} supporting reuse of a cached TokenStream for not-tokenized values. */
static class StringField extends Field {

public StringField(String name, String value, FieldType fieldType) {
super(name, fieldType);
fieldsData = value;
}

@Override
public TokenStream tokenStream(Analyzer analyzer) throws IOException {
if (!fieldType().indexed()) {
return null;
}
// Only use the cached TokenStream if the value is indexed and not-tokenized
if (fieldType().tokenized()) {
return super.tokenStream(analyzer);
}
return NOT_ANALYZED_TOKENSTREAM.get().setValue((String) fieldsData);
}
}

private static final ThreadLocal<StringTokenStream> NOT_ANALYZED_TOKENSTREAM = new ThreadLocal<StringTokenStream>() {
@Override
protected StringTokenStream initialValue() {
return new StringTokenStream();
}
};


// Copied from Field.java
static final class StringTokenStream extends TokenStream {
private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);
private final OffsetAttribute offsetAttribute = addAttribute(OffsetAttribute.class);
private boolean used = false;
private String value = null;

/**
* Creates a new TokenStream that returns a String as single token.
* <p>Warning: Does not initialize the value, you must call
* {@link #setValue(String)} afterwards!
*/
StringTokenStream() {
}

/** Sets the string value. */
StringTokenStream setValue(String value) {
this.value = value;
return this;
}

@Override
public boolean incrementToken() {
if (used) {
return false;
}
clearAttributes();
termAttribute.append(value);
offsetAttribute.setOffset(0, value.length());
used = true;
return true;
}

@Override
public void end() {
final int finalOffset = value.length();
offsetAttribute.setOffset(finalOffset, finalOffset);
value = null;
}

@Override
public void reset() {
used = false;
}

@Override
public void close() {
value = null;
}
}

/**
* Parsed value and boost to be returned from {@link #parseCreateFieldForString}.
*/
Expand Down
Expand Up @@ -24,6 +24,7 @@
import com.google.common.base.Objects;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.XStringField;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.util.BytesRef;
Expand Down Expand Up @@ -570,7 +571,7 @@ private void parse(ParseContext context, GeoPoint point, String geohash) throws
}

if (fieldType.indexed() || fieldType.stored()) {
Field field = new Field(names.indexName(), Double.toString(point.lat()) + ',' + Double.toString(point.lon()), fieldType);
Field field = new XStringField(names.indexName(), Double.toString(point.lat()) + ',' + Double.toString(point.lon()), fieldType);
context.doc().add(field);
}
if (enableGeoHash) {
Expand Down
Expand Up @@ -23,6 +23,7 @@
import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.XStringField;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.TermsFilter;
Expand Down Expand Up @@ -313,7 +314,7 @@ protected void parseCreateField(ParseContext context, List<Field> fields) throws
} // else we are in the pre/post parse phase

if (fieldType.indexed() || fieldType.stored()) {
fields.add(new Field(names.indexName(), context.id(), fieldType));
fields.add(new XStringField(names.indexName(), context.id(), fieldType));
}
if (hasDocValues()) {
fields.add(new BinaryDocValuesField(names.indexName(), new BytesRef(context.id())));
Expand Down
Expand Up @@ -22,6 +22,7 @@
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.XStringField;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.Strings;
Expand Down Expand Up @@ -185,7 +186,7 @@ protected void parseCreateField(ParseContext context, List<Field> fields) throws
if (!enabledState.enabled) {
return;
}
fields.add(new Field(names.indexName(), context.index(), fieldType));
fields.add(new XStringField(names.indexName(), context.index(), fieldType));
}

@Override
Expand Down
Expand Up @@ -20,6 +20,7 @@

import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.XStringField;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.TermFilter;
Expand Down Expand Up @@ -188,7 +189,7 @@ protected void parseCreateField(ParseContext context, List<Field> fields) throws
// we are in the parsing of _parent phase
String parentId = context.parser().text();
context.sourceToParse().parent(parentId);
fields.add(new Field(names.indexName(), Uid.createUid(context.stringBuilder(), type, parentId), fieldType));
fields.add(new XStringField(names.indexName(), Uid.createUid(context.stringBuilder(), type, parentId), fieldType));
} else {
// otherwise, we are running it post processing of the xcontent
String parsedParentId = context.doc().get(Defaults.NAME);
Expand All @@ -199,7 +200,7 @@ protected void parseCreateField(ParseContext context, List<Field> fields) throws
throw new MapperParsingException("No parent id provided, not within the document, and not externally");
}
// we did not add it in the parsing phase, add it now
fields.add(new Field(names.indexName(), Uid.createUid(context.stringBuilder(), type, parentId), fieldType));
fields.add(new XStringField(names.indexName(), Uid.createUid(context.stringBuilder(), type, parentId), fieldType));
} else if (parentId != null && !parsedParentId.equals(Uid.createUid(context.stringBuilder(), type, parentId))) {
throw new MapperParsingException("Parent id mismatch, document value is [" + Uid.createUid(parsedParentId).id() + "], while external value is [" + parentId + "]");
}
Expand Down
Expand Up @@ -22,6 +22,7 @@
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.XStringField;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.Strings;
Expand Down Expand Up @@ -226,7 +227,7 @@ protected void parseCreateField(ParseContext context, List<Field> fields) throws
context.ignoredValue(names.indexName(), routing);
return;
}
fields.add(new Field(names.indexName(), routing, fieldType));
fields.add(new XStringField(names.indexName(), routing, fieldType));
}
}
}
Expand Down
Expand Up @@ -22,6 +22,7 @@
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.document.XStringField;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.TermFilter;
Expand Down Expand Up @@ -181,7 +182,7 @@ protected void parseCreateField(ParseContext context, List<Field> fields) throws
if (!fieldType.indexed() && !fieldType.stored()) {
return;
}
fields.add(new Field(names.indexName(), context.type(), fieldType));
fields.add(new XStringField(names.indexName(), context.type(), fieldType));
if (hasDocValues()) {
fields.add(new SortedSetDocValuesField(names.indexName(), new BytesRef(context.type())));
}
Expand Down
Expand Up @@ -22,6 +22,7 @@
import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.XStringField;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.Term;
Expand Down Expand Up @@ -153,7 +154,7 @@ public void postParse(ParseContext context) throws IOException {
// we need to go over the docs and add it...
for (int i = 1; i < context.docs().size(); i++) {
final Document doc = context.docs().get(i);
doc.add(new Field(UidFieldMapper.NAME, uidField.stringValue(), Defaults.NESTED_FIELD_TYPE));
doc.add(new XStringField(UidFieldMapper.NAME, uidField.stringValue(), Defaults.NESTED_FIELD_TYPE));
}
}
}
Expand All @@ -175,7 +176,7 @@ public boolean includeInObject() {

@Override
protected void parseCreateField(ParseContext context, List<Field> fields) throws IOException {
Field uid = new Field(NAME, Uid.createUid(context.stringBuilder(), context.type(), context.id()), Defaults.FIELD_TYPE);
Field uid = new XStringField(NAME, Uid.createUid(context.stringBuilder(), context.type(), context.id()), Defaults.FIELD_TYPE);
context.uid(uid);
fields.add(uid);
if (hasDocValues()) {
Expand Down
Expand Up @@ -21,6 +21,7 @@

import com.carrotsearch.hppc.cursors.ObjectObjectCursor;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.XStringField;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.TermFilter;
Expand Down Expand Up @@ -432,12 +433,12 @@ public void parse(ParseContext context) throws IOException {
// we also rely on this for UidField#loadVersion

// this is a deeply nested field
nestedDoc.add(new Field(UidFieldMapper.NAME, uidField.stringValue(), UidFieldMapper.Defaults.NESTED_FIELD_TYPE));
nestedDoc.add(new XStringField(UidFieldMapper.NAME, uidField.stringValue(), UidFieldMapper.Defaults.NESTED_FIELD_TYPE));
}
// the type of the nested doc starts with __, so we can identify that its a nested one in filters
// note, we don't prefix it with the type of the doc since it allows us to execute a nested query
// across types (for example, with similar nested objects)
nestedDoc.add(new Field(TypeFieldMapper.NAME, nestedTypePathAsString, TypeFieldMapper.Defaults.FIELD_TYPE));
nestedDoc.add(new XStringField(TypeFieldMapper.NAME, nestedTypePathAsString, TypeFieldMapper.Defaults.FIELD_TYPE));
restoreDoc = context.switchDoc(nestedDoc);
context.addDoc(nestedDoc);
}
Expand Down
Expand Up @@ -27,6 +27,7 @@
import org.apache.lucene.analysis.ngram.NGramTokenizerFactory;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.document.XStringField;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.highlight.Encoder;
Expand Down Expand Up @@ -66,7 +67,7 @@ protected Field[] getFields(IndexReader reader, int docId, String fieldName) thr
List<Object> values = lookup.source().extractRawValues(mapper.names().sourcePath());
Field[] fields = new Field[values.size()];
for (int i = 0; i < values.size(); i++) {
fields[i] = new Field(mapper.names().indexName(), values.get(i).toString(), TextField.TYPE_NOT_STORED);
fields[i] = new XStringField(mapper.names().indexName(), values.get(i).toString(), TextField.TYPE_NOT_STORED);
}
return fields;
}
Expand Down

0 comments on commit 23f200b

Please sign in to comment.