[PARSER] Clarify XContentParser/Builder interface for binary vs. utf8…

… values Today we have very confusing naming since some methods names claim to read binary but in fact read utf-16 and convert to utf-8 under the hood. This commit clarifies the interfaces and adds additional documentation. Closes #7367
elastic · Sep 8, 2014 · 5a225e8 · 5a225e8
1 parent a7b1fa2
commit 5a225e8
Show file tree

Hide file tree

Showing 10 changed files with 114 additions and 21 deletions.
diff --git a/src/main/java/org/elasticsearch/common/xcontent/XContentBuilder.java b/src/main/java/org/elasticsearch/common/xcontent/XContentBuilder.java
@@ -512,6 +512,30 @@ public XContentBuilder field(XContentBuilderString name, BigDecimal value, int s
         return this;
     }
 
+    /**
+     * Writes the binary content of the given BytesRef
+     * Use {@link org.elasticsearch.common.xcontent.XContentParser#binaryValue()} to read the value back
+     */
+    public XContentBuilder field(String name, BytesRef value) throws IOException {
+        field(name);
+        generator.writeBinary(value.bytes, value.offset, value.length);
+        return this;
+    }
+
+    /**
+     * Writes the binary content of the given BytesRef
+     * Use {@link org.elasticsearch.common.xcontent.XContentParser#binaryValue()} to read the value back
+     */
+    public XContentBuilder field(XContentBuilderString name, BytesRef value) throws IOException {
+        field(name);
+        generator.writeBinary(value.bytes, value.offset, value.length);
+        return this;
+    }
+
+    /**
+     * Writes the binary content of the given BytesReference
+     * Use {@link org.elasticsearch.common.xcontent.XContentParser#binaryValue()} to read the value back
+     */
     public XContentBuilder field(String name, BytesReference value) throws IOException {
         field(name);
         if (!value.hasArray()) {
@@ -521,6 +545,10 @@ public XContentBuilder field(String name, BytesReference value) throws IOExcepti
         return this;
     }
 
+    /**
+     * Writes the binary content of the given BytesReference
+     * Use {@link org.elasticsearch.common.xcontent.XContentParser#binaryValue()} to read the value back
+     */
     public XContentBuilder field(XContentBuilderString name, BytesReference value) throws IOException {
         field(name);
         if (!value.hasArray()) {
@@ -530,12 +558,26 @@ public XContentBuilder field(XContentBuilderString name, BytesReference value) t
         return this;
     }
 
+    /**
+     * Writes the binary content of the given BytesRef as UTF-8 bytes
+     * Use {@link XContentParser#utf8Bytes()} to read the value back
+     */
     public XContentBuilder utf8Field(XContentBuilderString name, BytesRef value) throws IOException {
         field(name);
         generator.writeUTF8String(value.bytes, value.offset, value.length);
         return this;
     }
 
+    /**
+     * Writes the binary content of the given BytesRef as UTF-8 bytes
+     * Use {@link XContentParser#utf8Bytes()} to read the value back
+     */
+    public XContentBuilder utf8Field(String name, BytesRef value) throws IOException {
+        field(name);
+        generator.writeUTF8String(value.bytes, value.offset, value.length);
+        return this;
+    }
+
     public XContentBuilder field(String name, Text value) throws IOException {
         field(name);
         if (value.hasBytes() && value.bytes().hasArray()) {
@@ -989,6 +1031,22 @@ public XContentBuilder value(byte[] value, int offset, int length) throws IOExce
         return this;
     }
 
+    /**
+     * Writes the binary content of the given BytesRef
+     * Use {@link org.elasticsearch.common.xcontent.XContentParser#binaryValue()} to read the value back
+     */
+    public XContentBuilder value(BytesRef value) throws IOException {
+        if (value == null) {
+            return nullValue();
+        }
+        generator.writeBinary(value.bytes, value.offset, value.length);
+        return this;
+    }
+
+    /**
+     * Writes the binary content of the given BytesReference
+     * Use {@link org.elasticsearch.common.xcontent.XContentParser#binaryValue()} to read the value back
+     */
     public XContentBuilder value(BytesReference value) throws IOException {
         if (value == null) {
             return nullValue();
@@ -1173,6 +1231,9 @@ private void writeValue(Object value) throws IOException {
                 bytes = bytes.toBytesArray();
             }
             generator.writeBinary(bytes.array(), bytes.arrayOffset(), bytes.length());
+        } else if (value instanceof BytesRef) {
+            BytesRef bytes = (BytesRef) value;
+            generator.writeBinary(bytes.bytes, bytes.offset, bytes.length);
         } else if (value instanceof Text) {
             Text text = (Text) value;
             if (text.hasBytes() && text.bytes().hasArray()) {

diff --git a/src/main/java/org/elasticsearch/common/xcontent/XContentParser.java b/src/main/java/org/elasticsearch/common/xcontent/XContentParser.java
@@ -22,7 +22,6 @@
 import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.common.lease.Releasable;
 
-import java.io.Closeable;
 import java.io.IOException;
 import java.util.Map;
 
@@ -139,9 +138,17 @@ enum NumberType {
 
     String textOrNull() throws IOException;
 
-    BytesRef bytesOrNull() throws IOException;
+    /**
+     * Returns a BytesRef holding UTF-8 bytes or null if a null value is {@link Token#VALUE_NULL}.
+     * This method should be used to read text only binary content should be read through {@link #binaryValue()}
+     */
+    BytesRef utf8BytesOrNull() throws IOException;
 
-    BytesRef bytes() throws IOException;
+    /**
+     * Returns a BytesRef holding UTF-8 bytes.
+     * This method should be used to read text only binary content should be read through {@link #binaryValue()}
+     */
+    BytesRef utf8Bytes() throws IOException;
 
     Object objectText() throws IOException;
 
@@ -196,5 +203,32 @@ enum NumberType {
 
     boolean booleanValue() throws IOException;
 
+    /**
+     * Reads a plain binary value that was written via one of the following methods:
+     *
+     * <li>
+     *     <ul>{@link XContentBuilder#field(String, org.apache.lucene.util.BytesRef)}</ul>
+     *     <ul>{@link XContentBuilder#field(String, org.elasticsearch.common.bytes.BytesReference)}</ul>
+     *     <ul>{@link XContentBuilder#field(String, byte[], int, int)}}</ul>
+     *     <ul>{@link XContentBuilder#field(String, byte[])}}</ul>
+     * </li>
+     *
+     * as well as via their <code>XContentBuilderString</code> variants of the separated value methods.
+     * Note: Do not use this method to read values written with:
+     * <li>
+     *     <ul>{@link XContentBuilder#utf8Field(XContentBuilderString, org.apache.lucene.util.BytesRef)}</ul>
+     *     <ul>{@link XContentBuilder#utf8Field(String, org.apache.lucene.util.BytesRef)}</ul>
+     * </li>
+     *
+     * these methods write UTF-8 encoded strings and must be read through:
+     * <li>
+     *     <ul>{@link XContentParser#utf8Bytes()}</ul>
+     *     <ul>{@link XContentParser#utf8BytesOrNull()}}</ul>
+     *     <ul>{@link XContentParser#text()} ()}</ul>
+     *     <ul>{@link XContentParser#textOrNull()} ()}</ul>
+     *     <ul>{@link XContentParser#textCharacters()} ()}}</ul>
+     * </li>
+     *
+     */
     byte[] binaryValue() throws IOException;
 }
diff --git a/src/main/java/org/elasticsearch/common/xcontent/json/JsonXContentParser.java b/src/main/java/org/elasticsearch/common/xcontent/json/JsonXContentParser.java
@@ -22,6 +22,7 @@
 import com.fasterxml.jackson.core.JsonParser;
 import com.fasterxml.jackson.core.JsonToken;
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.UnicodeUtil;
 import org.elasticsearch.ElasticsearchIllegalStateException;
 import org.elasticsearch.common.xcontent.XContentType;
@@ -86,7 +87,7 @@ public String text() throws IOException {
     }
 
     @Override
-    public BytesRef bytes() throws IOException {
+    public BytesRef utf8Bytes() throws IOException {
         BytesRef bytes = new BytesRef();
         UnicodeUtil.UTF16toUTF8(parser.getTextCharacters(), parser.getTextOffset(), parser.getTextLength(), bytes);
         return bytes;
@@ -114,7 +115,7 @@ public Object objectText() throws IOException {
     public Object objectBytes() throws IOException {
         JsonToken currentToken = parser.getCurrentToken();
         if (currentToken == JsonToken.VALUE_STRING) {
-            return bytes();
+            return utf8Bytes();
         } else if (currentToken == JsonToken.VALUE_NUMBER_INT || currentToken == JsonToken.VALUE_NUMBER_FLOAT) {
             return parser.getNumberValue();
         } else if (currentToken == JsonToken.VALUE_TRUE) {
@@ -124,7 +125,8 @@ public Object objectBytes() throws IOException {
         } else if (currentToken == JsonToken.VALUE_NULL) {
             return null;
         } else {
-            return bytes();
+            //TODO should this really do UTF-8 conversion?
+            return utf8Bytes();
         }
     }
 
@@ -185,11 +187,7 @@ public byte[] binaryValue() throws IOException {
 
     @Override
     public void close() {
-        try {
-            parser.close();
-        } catch (IOException e) {
-            // ignore
-        }
+        IOUtils.closeWhileHandlingException(parser);
     }
 
     private NumberType convertNumberType(JsonParser.NumberType numberType) {

diff --git a/src/main/java/org/elasticsearch/common/xcontent/support/AbstractXContentParser.java b/src/main/java/org/elasticsearch/common/xcontent/support/AbstractXContentParser.java
@@ -196,11 +196,11 @@ public String textOrNull() throws IOException {
 
 
     @Override
-    public BytesRef bytesOrNull() throws IOException {
+    public BytesRef utf8BytesOrNull() throws IOException {
         if (currentToken() == Token.VALUE_NULL) {
             return null;
         }
-        return bytes();
+        return utf8Bytes();
     }
 
     @Override

diff --git a/src/main/java/org/elasticsearch/index/mapper/core/CompletionFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/CompletionFieldMapper.java
@@ -289,7 +289,7 @@ public void parse(ParseContext context) throws IOException {
                         payload = payloadBuilder.bytes().toBytesRef();
                         payloadBuilder.close();
                     } else if (token.isValue()) {
-                        payload = parser.bytesOrNull();
+                        payload = parser.utf8BytesOrNull();
                     } else {
                         throw new MapperException("payload doesn't support type " + token);
                     }

diff --git a/src/main/java/org/elasticsearch/index/query/IdsFilterParser.java b/src/main/java/org/elasticsearch/index/query/IdsFilterParser.java
@@ -65,7 +65,7 @@ public Filter parse(QueryParseContext parseContext) throws IOException, QueryPar
                 if ("values".equals(currentFieldName)) {
                     idsProvided = true;
                     while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
-                        BytesRef value = parser.bytesOrNull();
+                        BytesRef value = parser.utf8BytesOrNull();
                         if (value == null) {
                             throw new QueryParsingException(parseContext.index(), "No value specified for term filter");
                         }

diff --git a/src/main/java/org/elasticsearch/index/query/IdsQueryParser.java b/src/main/java/org/elasticsearch/index/query/IdsQueryParser.java
@@ -70,7 +70,7 @@ public Query parse(QueryParseContext parseContext) throws IOException, QueryPars
                 if ("values".equals(currentFieldName)) {
                     idsProvided = true;
                     while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
-                        BytesRef value = parser.bytesOrNull();
+                        BytesRef value = parser.utf8BytesOrNull();
                         if (value == null) {
                             throw new QueryParsingException(parseContext.index(), "No value specified for term filter");
                         }

diff --git a/src/main/java/org/elasticsearch/index/query/TypeFilterParser.java b/src/main/java/org/elasticsearch/index/query/TypeFilterParser.java
@@ -59,7 +59,7 @@ public Filter parse(QueryParseContext parseContext) throws IOException, QueryPar
         if (token != XContentParser.Token.VALUE_STRING) {
             throw new QueryParsingException(parseContext.index(), "[type] filter should have a value field, and the type name");
         }
-        BytesRef type = parser.bytes();
+        BytesRef type = parser.utf8Bytes();
         // move to the next token
         parser.nextToken();
 

diff --git a/src/main/java/org/elasticsearch/search/suggest/SuggestParseElement.java b/src/main/java/org/elasticsearch/search/suggest/SuggestParseElement.java
@@ -61,7 +61,7 @@ public SuggestionSearchContext parseInternal(XContentParser parser, MapperServic
                 fieldName = parser.currentName();
             } else if (token.isValue()) {
                 if ("text".equals(fieldName)) {
-                    globalText = parser.bytes();
+                    globalText = parser.utf8Bytes();
                 } else {
                     throw new ElasticsearchIllegalArgumentException("[suggest] does not support [" + fieldName + "]");
                 }
@@ -75,7 +75,7 @@ public SuggestionSearchContext parseInternal(XContentParser parser, MapperServic
                         fieldName = parser.currentName();
                     } else if (token.isValue()) {
                         if ("text".equals(fieldName)) {
-                            suggestText = parser.bytes();
+                            suggestText = parser.utf8Bytes();
                         } else {
                             throw new ElasticsearchIllegalArgumentException("[suggest] does not support [" + fieldName + "]");
                         }

diff --git a/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggestParser.java b/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggestParser.java
@@ -117,9 +117,9 @@ public SuggestionSearchContext.SuggestionContext parse(XContentParser parser, Ma
                             fieldName = parser.currentName();
                         } else if (token.isValue()) {
                             if ("pre_tag".equals(fieldName) || "preTag".equals(fieldName)) {
-                                suggestion.setPreTag(parser.bytes());
+                                suggestion.setPreTag(parser.utf8Bytes());
                             } else if ("post_tag".equals(fieldName) || "postTag".equals(fieldName)) {
-                                suggestion.setPostTag(parser.bytes());
+                                suggestion.setPostTag(parser.utf8Bytes());
                             } else {
                                 throw new ElasticsearchIllegalArgumentException(
                                     "suggester[phrase][highlight] doesn't support field [" + fieldName + "]");