elastic · johtani · Apr 27, 2015 · Apr 30, 2015 · May 2, 2015 · May 13, 2015
diff --git a/docs/reference/indices/analyze.asciidoc b/docs/reference/indices/analyze.asciidoc
@@ -18,6 +18,19 @@ curl -XGET 'localhost:9200/_analyze' -d '
 
 coming[2.0.0, body based parameters were added in 2.0.0]
 
+If text parameter is provided as array of strings, it is analyzed as a multi-valued field.
+
+[source,js]
+--------------------------------------------------
+curl -XGET 'localhost:9200/_analyze' -d '
+{
+  "analyzer" : "standard",
+  "text" : ["this is a test", "the second text"]
+}'
+--------------------------------------------------
+
+coming[2.0.0, body based parameters were added in 2.0.0]
+
 Or by building a custom transient analyzer out of tokenizers,
 token filters and char filters. Token filters can use the shorter 'filters'
 parameter name:

diff --git a/rest-api-spec/api/indices.analyze.json b/rest-api-spec/api/indices.analyze.json
@@ -41,7 +41,7 @@
           "description" : "With `true`, specify that a local shard should be used if available, with `false`, use a random shard (default: true)"
         },
         "text": {
-          "type" : "string",
+          "type" : "list",
           "description" : "The text on which the analysis should be performed (when request body is not used)"
         },
         "tokenizer": {

diff --git a/rest-api-spec/test/indices.analyze/10_analyze.yaml b/rest-api-spec/test/indices.analyze/10_analyze.yaml
@@ -63,3 +63,11 @@ setup:
           body: { "text": "Bar Foo", "filters": ["lowercase"], "tokenizer": keyword }
     - length: {tokens: 1 }
     - match:     { tokens.0.token: bar foo }
+---
+"Array text":
+    - do:
+        indices.analyze:
+          body: { "text": ["Foo Bar", "Baz"], "filters": ["lowercase"], "tokenizer": keyword }
+    - length: {tokens: 2 }
+    - match:     { tokens.0.token: foo bar }
+    - match:     { tokens.1.token: baz }
diff --git a/src/main/java/org/elasticsearch/action/admin/indices/analyze/AnalyzeRequest.java b/src/main/java/org/elasticsearch/action/admin/indices/analyze/AnalyzeRequest.java
@@ -36,7 +36,7 @@
  */
 public class AnalyzeRequest extends SingleCustomOperationRequest<AnalyzeRequest> {
 
-    private String text;
+    private String[] text;
 
     private String analyzer;
 
@@ -61,11 +61,11 @@ public AnalyzeRequest(String index) {
         this.index(index);
     }
 
-    public String text() {
+    public String[] text() {
         return this.text;
     }
 
-    public AnalyzeRequest text(String text) {
+    public AnalyzeRequest text(String... text) {
         this.text = text;
         return this;
     }
@@ -118,7 +118,7 @@ public String field() {
     @Override
     public ActionRequestValidationException validate() {
         ActionRequestValidationException validationException = super.validate();
-        if (text == null) {
+        if (text == null || text.length == 0) {
             validationException = addValidationError("text is missing", validationException);
         }
         if (tokenFilters == null) {
@@ -133,7 +133,7 @@ public ActionRequestValidationException validate() {
     @Override
     public void readFrom(StreamInput in) throws IOException {
         super.readFrom(in);
-        text = in.readString();
+        text = in.readStringArray();
         analyzer = in.readOptionalString();
         tokenizer = in.readOptionalString();
         tokenFilters = in.readStringArray();
@@ -144,7 +144,7 @@ public void readFrom(StreamInput in) throws IOException {
     @Override
     public void writeTo(StreamOutput out) throws IOException {
         super.writeTo(out);
-        out.writeString(text);
+        out.writeStringArray(text);
         out.writeOptionalString(analyzer);
         out.writeOptionalString(tokenizer);
         out.writeStringArray(tokenFilters);

diff --git a/src/main/java/org/elasticsearch/action/admin/indices/analyze/AnalyzeRequestBuilder.java b/src/main/java/org/elasticsearch/action/admin/indices/analyze/AnalyzeRequestBuilder.java
@@ -31,7 +31,7 @@ public AnalyzeRequestBuilder(IndicesAdminClient indicesClient) {
         super(indicesClient, new AnalyzeRequest());
     }
 
-    public AnalyzeRequestBuilder(IndicesAdminClient indicesClient, String index, String text) {
+    public AnalyzeRequestBuilder(IndicesAdminClient indicesClient, String index, String... text) {
         super(indicesClient, new AnalyzeRequest(index).text(text));
     }
 

diff --git a/src/main/java/org/elasticsearch/action/admin/indices/analyze/TransportAnalyzeAction.java b/src/main/java/org/elasticsearch/action/admin/indices/analyze/TransportAnalyzeAction.java
@@ -25,10 +25,10 @@
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+import org.apache.lucene.util.IOUtils;
 import org.elasticsearch.ElasticsearchException;
 import org.elasticsearch.ElasticsearchIllegalArgumentException;
 import org.elasticsearch.Version;
-import org.elasticsearch.action.ActionListener;
 import org.elasticsearch.action.support.ActionFilters;
 import org.elasticsearch.action.support.single.custom.TransportSingleCustomOperationAction;
 import org.elasticsearch.cluster.ClusterService;
@@ -212,36 +212,41 @@ protected AnalyzeResponse shardOperation(AnalyzeRequest request, ShardId shardId
 
         List<AnalyzeResponse.AnalyzeToken> tokens = Lists.newArrayList();
         TokenStream stream = null;
-        try {
-            stream = analyzer.tokenStream(field, request.text());
-            stream.reset();
-            CharTermAttribute term = stream.addAttribute(CharTermAttribute.class);
-            PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class);
-            OffsetAttribute offset = stream.addAttribute(OffsetAttribute.class);
-            TypeAttribute type = stream.addAttribute(TypeAttribute.class);
-
-            int position = -1;
-            while (stream.incrementToken()) {
-                int increment = posIncr.getPositionIncrement();
-                if (increment > 0) {
-                    position = position + increment;
-                }
-                tokens.add(new AnalyzeResponse.AnalyzeToken(term.toString(), position, offset.startOffset(), offset.endOffset(), type.type()));
-            }
-            stream.end();
-        } catch (IOException e) {
-            throw new ElasticsearchException("failed to analyze", e);
-        } finally {
-            if (stream != null) {
-                try {
-                    stream.close();
-                } catch (IOException e) {
-                    // ignore
+        int lastPosition = -1;
+        int lastOffset = 0;
+        for (String text : request.text()) {
+            try {
+                stream = analyzer.tokenStream(field, text);
+                stream.reset();
+                CharTermAttribute term = stream.addAttribute(CharTermAttribute.class);
+                PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class);
+                OffsetAttribute offset = stream.addAttribute(OffsetAttribute.class);
+                TypeAttribute type = stream.addAttribute(TypeAttribute.class);
+
+                while (stream.incrementToken()) {
+                    int increment = posIncr.getPositionIncrement();
+                    if (increment > 0) {
+                        lastPosition = lastPosition + increment;
+                    }
+                    tokens.add(new AnalyzeResponse.AnalyzeToken(term.toString(), lastPosition, lastOffset + offset.startOffset(), lastOffset + offset.endOffset(), type.type()));
+
                 }
+                stream.end();
+                lastOffset += offset.endOffset();
+                lastPosition += posIncr.getPositionIncrement();
+
+                lastPosition += analyzer.getPositionIncrementGap(field);
+                lastOffset += analyzer.getOffsetGap(field);
+
+            } catch (IOException e) {
+                throw new ElasticsearchException("failed to analyze", e);
+            } finally {
+                IOUtils.closeWhileHandlingException(stream);
             }
-            if (closeAnalyzer) {
-                analyzer.close();
-            }
+        }
+
+        if (closeAnalyzer) {
+            analyzer.close();
         }
 
         return new AnalyzeResponse(tokens);

diff --git a/src/main/java/org/elasticsearch/client/IndicesAdminClient.java b/src/main/java/org/elasticsearch/client/IndicesAdminClient.java
@@ -587,6 +587,21 @@ public interface IndicesAdminClient extends ElasticsearchClient<IndicesAdminClie
      */
     AnalyzeRequestBuilder prepareAnalyze(String text);
 
+    /**
+     * Analyze texts under the provided index.
+     *
+     * @param index The index name
+     * @param text  The array of text to analyze
+     */
+    AnalyzeRequestBuilder prepareAnalyzeWithIndexAndMultiValued(@Nullable String index, String... text);
+
+    /**
+     * Analyze texts.
+     *
+     * @param text The array of text to analyze
+     */
+    AnalyzeRequestBuilder prepareAnalyzeWithMultiValued(String... text);
+
     /**
      * Puts an index template.
      */

diff --git a/src/main/java/org/elasticsearch/client/support/AbstractIndicesAdminClient.java b/src/main/java/org/elasticsearch/client/support/AbstractIndicesAdminClient.java
@@ -480,6 +480,16 @@ public AnalyzeRequestBuilder prepareAnalyze(String text) {
         return new AnalyzeRequestBuilder(this, null, text);
     }
 
+    @Override
+    public AnalyzeRequestBuilder prepareAnalyzeWithIndexAndMultiValued(@Nullable String index, String... text) {
+        return new AnalyzeRequestBuilder(this, index, text);
+    }
+
+    @Override
+    public AnalyzeRequestBuilder prepareAnalyzeWithMultiValued(String... text) {
+        return new AnalyzeRequestBuilder(this, null, text);
+    }
+
     @Override
     public ActionFuture<PutIndexTemplateResponse> putTemplate(final PutIndexTemplateRequest request) {
         return execute(PutIndexTemplateAction.INSTANCE, request);

diff --git a/src/main/java/org/elasticsearch/rest/action/admin/indices/analyze/RestAnalyzeAction.java b/src/main/java/org/elasticsearch/rest/action/admin/indices/analyze/RestAnalyzeAction.java
@@ -23,11 +23,10 @@
 import org.elasticsearch.action.admin.indices.analyze.AnalyzeRequest;
 import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse;
 import org.elasticsearch.client.Client;
-import org.elasticsearch.common.bytes.BytesArray;
+import org.elasticsearch.common.Strings;
 import org.elasticsearch.common.bytes.BytesReference;
 import org.elasticsearch.common.inject.Inject;
 import org.elasticsearch.common.settings.Settings;
-import org.elasticsearch.common.xcontent.XContentFactory;
 import org.elasticsearch.common.xcontent.XContentHelper;
 import org.elasticsearch.common.xcontent.XContentParser;
 import org.elasticsearch.common.xcontent.XContentType;
@@ -58,10 +57,10 @@ public RestAnalyzeAction(Settings settings, RestController controller, Client cl
     @Override
     public void handleRequest(final RestRequest request, final RestChannel channel, final Client client) {
 
-        String text = request.param("text");
+        String[] texts = request.paramAsStringArrayOrEmptyIfAll("text");
 
         AnalyzeRequest analyzeRequest = new AnalyzeRequest(request.param("index"));
-        analyzeRequest.text(text);
+        analyzeRequest.text(texts);
         analyzeRequest.listenerThreaded(false);
         analyzeRequest.preferLocal(request.paramAsBoolean("prefer_local", analyzeRequest.preferLocalShard()));
         analyzeRequest.analyzer(request.param("analyzer"));
@@ -73,9 +72,9 @@ public void handleRequest(final RestRequest request, final RestChannel channel,
         if (RestActions.hasBodyContent(request)) {
             XContentType type = RestActions.guessBodyContentType(request);
             if (type == null) {
-                if (text == null) {
-                    text = RestActions.getRestContent(request).toUtf8();
-                    analyzeRequest.text(text);
+                if (texts == null || texts.length == 0) {
+                    texts = new String[]{ RestActions.getRestContent(request).toUtf8() };
+                    analyzeRequest.text(texts);
                 }
             } else {
                 // NOTE: if rest request with xcontent body has request parameters, the parameters does not override xcontent values
@@ -99,7 +98,16 @@ public static void buildFromContent(BytesReference content, AnalyzeRequest analy
                     } else if ("prefer_local".equals(currentFieldName) && token == XContentParser.Token.VALUE_BOOLEAN) {
                         analyzeRequest.preferLocal(parser.booleanValue());
                     } else if ("text".equals(currentFieldName) && token == XContentParser.Token.VALUE_STRING) {
-                            analyzeRequest.text(parser.text());
+                        analyzeRequest.text(parser.text());
+                    } else if ("text".equals(currentFieldName) && token == XContentParser.Token.START_ARRAY) {
+                        List<String> texts = Lists.newArrayList();
+                        while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
+                            if (token.isValue() == false) {
+                                throw new ElasticsearchIllegalArgumentException(currentFieldName + " array element should only contain text");
+                            }
+                            texts.add(parser.text());
+                        }
+                        analyzeRequest.text(texts.toArray(Strings.EMPTY_ARRAY));
                     } else if ("analyzer".equals(currentFieldName) && token == XContentParser.Token.VALUE_STRING) {
                         analyzeRequest.analyzer(parser.text());
                     } else if ("field".equals(currentFieldName) && token == XContentParser.Token.VALUE_STRING) {
@@ -114,7 +122,7 @@ public static void buildFromContent(BytesReference content, AnalyzeRequest analy
                             }
                             filters.add(parser.text());
                         }
-                        analyzeRequest.tokenFilters(filters.toArray(new String[0]));
+                        analyzeRequest.tokenFilters(filters.toArray(Strings.EMPTY_ARRAY));
                     } else if ("char_filters".equals(currentFieldName) && token == XContentParser.Token.START_ARRAY) {
                         List<String> charFilters = Lists.newArrayList();
                         while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
@@ -123,7 +131,7 @@ public static void buildFromContent(BytesReference content, AnalyzeRequest analy
                             }
                             charFilters.add(parser.text());
                         }
-                        analyzeRequest.tokenFilters(charFilters.toArray(new String[0]));
+                        analyzeRequest.tokenFilters(charFilters.toArray(Strings.EMPTY_ARRAY));
                     } else {
                         throw new ElasticsearchIllegalArgumentException("Unknown parameter [" + currentFieldName + "] in request body or parameter is of the wrong type[" + token + "] ");
                     }

diff --git a/src/test/java/org/elasticsearch/indices/analyze/AnalyzeActionTests.java b/src/test/java/org/elasticsearch/indices/analyze/AnalyzeActionTests.java
@@ -221,7 +221,8 @@ public void testParseXContentForAnalyzeReuqest() throws Exception {
 
         RestAnalyzeAction.buildFromContent(content, analyzeRequest);
 
-        assertThat(analyzeRequest.text(), equalTo("THIS IS A TEST"));
+        assertThat(analyzeRequest.text().length, equalTo(1));
+        assertThat(analyzeRequest.text(), equalTo(new String[]{"THIS IS A TEST"}));
         assertThat(analyzeRequest.tokenizer(), equalTo("keyword"));
         assertThat(analyzeRequest.tokenFilters(), equalTo(new String[]{"lowercase"}));
     }
@@ -240,7 +241,6 @@ public void testParseXContentForAnalyzeRequestWithInvalidJsonThrowsException() t
         }
     }
 
-
     @Test
     public void testParseXContentForAnalyzeRequestWithUnknownParamThrowsException() throws Exception {
         AnalyzeRequest analyzeRequest = new AnalyzeRequest("for test");
@@ -259,4 +259,46 @@ public void testParseXContentForAnalyzeRequestWithUnknownParamThrowsException()
         }
     }
 
+    @Test
+    public void analyzerWithMultiValues() throws Exception {
+
+        assertAcked(prepareCreate("test").addAlias(new Alias("alias")));
+        ensureGreen();
+
+        client().admin().indices().preparePutMapping("test")
+            .setType("document").setSource(
+            "{\n" +
+                "    \"document\":{\n" +
+                "        \"properties\":{\n" +
+                "            \"simple\":{\n" +
+                "                \"type\":\"string\",\n" +
+                "                \"analyzer\": \"simple\",\n" +
+                "                \"position_offset_gap\": 100\n" +
+                "            }\n" +
+                "        }\n" +
+                "    }\n" +
+                "}"
+        ).get();
+
+        String[] texts = new String[]{"THIS IS A TEST", "THE SECOND TEXT"};
+
+        final AnalyzeRequestBuilder requestBuilder = client().admin().indices().prepareAnalyzeWithMultiValued(texts);
+        requestBuilder.setIndex(indexOrAlias());
+        requestBuilder.setField("simple");
+        AnalyzeResponse analyzeResponse = requestBuilder.get();
+        assertThat(analyzeResponse.getTokens().size(), equalTo(7));
+        AnalyzeResponse.AnalyzeToken token = analyzeResponse.getTokens().get(3);
+        assertThat(token.getTerm(), equalTo("test"));
+        assertThat(token.getPosition(), equalTo(3));
+        assertThat(token.getStartOffset(), equalTo(10));
+        assertThat(token.getEndOffset(), equalTo(14));
+
+        token = analyzeResponse.getTokens().get(5);
+        assertThat(token.getTerm(), equalTo("second"));
+        assertThat(token.getPosition(), equalTo(105));
+        assertThat(token.getStartOffset(), equalTo(19));
+        assertThat(token.getEndOffset(), equalTo(25));
+
+    }
+
 }