Skip to content

Commit

Permalink
[PARSER] Clarify XContentParser/Builder interface for binary vs. utf8…
Browse files Browse the repository at this point in the history
… values

Today we have very confusing naming since some methods names claim to
read binary but in fact read utf-16 and convert to utf-8 under the hood.
This commit clarifies the interfaces and adds additional documentation.

Closes #7367
  • Loading branch information
s1monw authored and areek committed Sep 8, 2014
1 parent a7b1fa2 commit 5a225e8
Show file tree
Hide file tree
Showing 10 changed files with 114 additions and 21 deletions.
Expand Up @@ -512,6 +512,30 @@ public XContentBuilder field(XContentBuilderString name, BigDecimal value, int s
return this;
}

/**
* Writes the binary content of the given BytesRef
* Use {@link org.elasticsearch.common.xcontent.XContentParser#binaryValue()} to read the value back
*/
public XContentBuilder field(String name, BytesRef value) throws IOException {
field(name);
generator.writeBinary(value.bytes, value.offset, value.length);
return this;
}

/**
* Writes the binary content of the given BytesRef
* Use {@link org.elasticsearch.common.xcontent.XContentParser#binaryValue()} to read the value back
*/
public XContentBuilder field(XContentBuilderString name, BytesRef value) throws IOException {
field(name);
generator.writeBinary(value.bytes, value.offset, value.length);
return this;
}

/**
* Writes the binary content of the given BytesReference
* Use {@link org.elasticsearch.common.xcontent.XContentParser#binaryValue()} to read the value back
*/
public XContentBuilder field(String name, BytesReference value) throws IOException {
field(name);
if (!value.hasArray()) {
Expand All @@ -521,6 +545,10 @@ public XContentBuilder field(String name, BytesReference value) throws IOExcepti
return this;
}

/**
* Writes the binary content of the given BytesReference
* Use {@link org.elasticsearch.common.xcontent.XContentParser#binaryValue()} to read the value back
*/
public XContentBuilder field(XContentBuilderString name, BytesReference value) throws IOException {
field(name);
if (!value.hasArray()) {
Expand All @@ -530,12 +558,26 @@ public XContentBuilder field(XContentBuilderString name, BytesReference value) t
return this;
}

/**
* Writes the binary content of the given BytesRef as UTF-8 bytes
* Use {@link XContentParser#utf8Bytes()} to read the value back
*/
public XContentBuilder utf8Field(XContentBuilderString name, BytesRef value) throws IOException {
field(name);
generator.writeUTF8String(value.bytes, value.offset, value.length);
return this;
}

/**
* Writes the binary content of the given BytesRef as UTF-8 bytes
* Use {@link XContentParser#utf8Bytes()} to read the value back
*/
public XContentBuilder utf8Field(String name, BytesRef value) throws IOException {
field(name);
generator.writeUTF8String(value.bytes, value.offset, value.length);
return this;
}

public XContentBuilder field(String name, Text value) throws IOException {
field(name);
if (value.hasBytes() && value.bytes().hasArray()) {
Expand Down Expand Up @@ -989,6 +1031,22 @@ public XContentBuilder value(byte[] value, int offset, int length) throws IOExce
return this;
}

/**
* Writes the binary content of the given BytesRef
* Use {@link org.elasticsearch.common.xcontent.XContentParser#binaryValue()} to read the value back
*/
public XContentBuilder value(BytesRef value) throws IOException {
if (value == null) {
return nullValue();
}
generator.writeBinary(value.bytes, value.offset, value.length);
return this;
}

/**
* Writes the binary content of the given BytesReference
* Use {@link org.elasticsearch.common.xcontent.XContentParser#binaryValue()} to read the value back
*/
public XContentBuilder value(BytesReference value) throws IOException {
if (value == null) {
return nullValue();
Expand Down Expand Up @@ -1173,6 +1231,9 @@ private void writeValue(Object value) throws IOException {
bytes = bytes.toBytesArray();
}
generator.writeBinary(bytes.array(), bytes.arrayOffset(), bytes.length());
} else if (value instanceof BytesRef) {
BytesRef bytes = (BytesRef) value;
generator.writeBinary(bytes.bytes, bytes.offset, bytes.length);
} else if (value instanceof Text) {
Text text = (Text) value;
if (text.hasBytes() && text.bytes().hasArray()) {
Expand Down
Expand Up @@ -22,7 +22,6 @@
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.lease.Releasable;

import java.io.Closeable;
import java.io.IOException;
import java.util.Map;

Expand Down Expand Up @@ -139,9 +138,17 @@ enum NumberType {

String textOrNull() throws IOException;

BytesRef bytesOrNull() throws IOException;
/**
* Returns a BytesRef holding UTF-8 bytes or null if a null value is {@link Token#VALUE_NULL}.
* This method should be used to read text only binary content should be read through {@link #binaryValue()}
*/
BytesRef utf8BytesOrNull() throws IOException;

BytesRef bytes() throws IOException;
/**
* Returns a BytesRef holding UTF-8 bytes.
* This method should be used to read text only binary content should be read through {@link #binaryValue()}
*/
BytesRef utf8Bytes() throws IOException;

Object objectText() throws IOException;

Expand Down Expand Up @@ -196,5 +203,32 @@ enum NumberType {

boolean booleanValue() throws IOException;

/**
* Reads a plain binary value that was written via one of the following methods:
*
* <li>
* <ul>{@link XContentBuilder#field(String, org.apache.lucene.util.BytesRef)}</ul>
* <ul>{@link XContentBuilder#field(String, org.elasticsearch.common.bytes.BytesReference)}</ul>
* <ul>{@link XContentBuilder#field(String, byte[], int, int)}}</ul>
* <ul>{@link XContentBuilder#field(String, byte[])}}</ul>
* </li>
*
* as well as via their <code>XContentBuilderString</code> variants of the separated value methods.
* Note: Do not use this method to read values written with:
* <li>
* <ul>{@link XContentBuilder#utf8Field(XContentBuilderString, org.apache.lucene.util.BytesRef)}</ul>
* <ul>{@link XContentBuilder#utf8Field(String, org.apache.lucene.util.BytesRef)}</ul>
* </li>
*
* these methods write UTF-8 encoded strings and must be read through:
* <li>
* <ul>{@link XContentParser#utf8Bytes()}</ul>
* <ul>{@link XContentParser#utf8BytesOrNull()}}</ul>
* <ul>{@link XContentParser#text()} ()}</ul>
* <ul>{@link XContentParser#textOrNull()} ()}</ul>
* <ul>{@link XContentParser#textCharacters()} ()}}</ul>
* </li>
*
*/
byte[] binaryValue() throws IOException;
}
Expand Up @@ -22,6 +22,7 @@
import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.core.JsonToken;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.UnicodeUtil;
import org.elasticsearch.ElasticsearchIllegalStateException;
import org.elasticsearch.common.xcontent.XContentType;
Expand Down Expand Up @@ -86,7 +87,7 @@ public String text() throws IOException {
}

@Override
public BytesRef bytes() throws IOException {
public BytesRef utf8Bytes() throws IOException {
BytesRef bytes = new BytesRef();
UnicodeUtil.UTF16toUTF8(parser.getTextCharacters(), parser.getTextOffset(), parser.getTextLength(), bytes);
return bytes;
Expand Down Expand Up @@ -114,7 +115,7 @@ public Object objectText() throws IOException {
public Object objectBytes() throws IOException {
JsonToken currentToken = parser.getCurrentToken();
if (currentToken == JsonToken.VALUE_STRING) {
return bytes();
return utf8Bytes();
} else if (currentToken == JsonToken.VALUE_NUMBER_INT || currentToken == JsonToken.VALUE_NUMBER_FLOAT) {
return parser.getNumberValue();
} else if (currentToken == JsonToken.VALUE_TRUE) {
Expand All @@ -124,7 +125,8 @@ public Object objectBytes() throws IOException {
} else if (currentToken == JsonToken.VALUE_NULL) {
return null;
} else {
return bytes();
//TODO should this really do UTF-8 conversion?
return utf8Bytes();
}
}

Expand Down Expand Up @@ -185,11 +187,7 @@ public byte[] binaryValue() throws IOException {

@Override
public void close() {
try {
parser.close();
} catch (IOException e) {
// ignore
}
IOUtils.closeWhileHandlingException(parser);
}

private NumberType convertNumberType(JsonParser.NumberType numberType) {
Expand Down
Expand Up @@ -196,11 +196,11 @@ public String textOrNull() throws IOException {


@Override
public BytesRef bytesOrNull() throws IOException {
public BytesRef utf8BytesOrNull() throws IOException {
if (currentToken() == Token.VALUE_NULL) {
return null;
}
return bytes();
return utf8Bytes();
}

@Override
Expand Down
Expand Up @@ -289,7 +289,7 @@ public void parse(ParseContext context) throws IOException {
payload = payloadBuilder.bytes().toBytesRef();
payloadBuilder.close();
} else if (token.isValue()) {
payload = parser.bytesOrNull();
payload = parser.utf8BytesOrNull();
} else {
throw new MapperException("payload doesn't support type " + token);
}
Expand Down
Expand Up @@ -65,7 +65,7 @@ public Filter parse(QueryParseContext parseContext) throws IOException, QueryPar
if ("values".equals(currentFieldName)) {
idsProvided = true;
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
BytesRef value = parser.bytesOrNull();
BytesRef value = parser.utf8BytesOrNull();
if (value == null) {
throw new QueryParsingException(parseContext.index(), "No value specified for term filter");
}
Expand Down
Expand Up @@ -70,7 +70,7 @@ public Query parse(QueryParseContext parseContext) throws IOException, QueryPars
if ("values".equals(currentFieldName)) {
idsProvided = true;
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
BytesRef value = parser.bytesOrNull();
BytesRef value = parser.utf8BytesOrNull();
if (value == null) {
throw new QueryParsingException(parseContext.index(), "No value specified for term filter");
}
Expand Down
Expand Up @@ -59,7 +59,7 @@ public Filter parse(QueryParseContext parseContext) throws IOException, QueryPar
if (token != XContentParser.Token.VALUE_STRING) {
throw new QueryParsingException(parseContext.index(), "[type] filter should have a value field, and the type name");
}
BytesRef type = parser.bytes();
BytesRef type = parser.utf8Bytes();
// move to the next token
parser.nextToken();

Expand Down
Expand Up @@ -61,7 +61,7 @@ public SuggestionSearchContext parseInternal(XContentParser parser, MapperServic
fieldName = parser.currentName();
} else if (token.isValue()) {
if ("text".equals(fieldName)) {
globalText = parser.bytes();
globalText = parser.utf8Bytes();
} else {
throw new ElasticsearchIllegalArgumentException("[suggest] does not support [" + fieldName + "]");
}
Expand All @@ -75,7 +75,7 @@ public SuggestionSearchContext parseInternal(XContentParser parser, MapperServic
fieldName = parser.currentName();
} else if (token.isValue()) {
if ("text".equals(fieldName)) {
suggestText = parser.bytes();
suggestText = parser.utf8Bytes();
} else {
throw new ElasticsearchIllegalArgumentException("[suggest] does not support [" + fieldName + "]");
}
Expand Down
Expand Up @@ -117,9 +117,9 @@ public SuggestionSearchContext.SuggestionContext parse(XContentParser parser, Ma
fieldName = parser.currentName();
} else if (token.isValue()) {
if ("pre_tag".equals(fieldName) || "preTag".equals(fieldName)) {
suggestion.setPreTag(parser.bytes());
suggestion.setPreTag(parser.utf8Bytes());
} else if ("post_tag".equals(fieldName) || "postTag".equals(fieldName)) {
suggestion.setPostTag(parser.bytes());
suggestion.setPostTag(parser.utf8Bytes());
} else {
throw new ElasticsearchIllegalArgumentException(
"suggester[phrase][highlight] doesn't support field [" + fieldName + "]");
Expand Down

0 comments on commit 5a225e8

Please sign in to comment.