Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Return term vectors as part of the search response #10729

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/reference/search/request-body.asciidoc
Expand Up @@ -107,6 +107,8 @@ include::request/script-fields.asciidoc[]

include::request/fielddata-fields.asciidoc[]

include::request/termvectors.asciidoc[]

include::request/post-filter.asciidoc[]

include::request/highlighting.asciidoc[]
Expand Down
30 changes: 30 additions & 0 deletions docs/reference/search/request/termvectors.asciidoc
@@ -0,0 +1,30 @@
[[search-request-termvectors]]
=== Term Vectors

Allows to return the term vectors for each hit, for example:

[source,js]
--------------------------------------------------
{
"query" : {
...
},
"term_vectors" : {
"fields" : ["text"],
"offsets" : true,
"payloads" : true,
"positions" : true,
"term_statistics" : true,
"field_statistics" : true
}
}
--------------------------------------------------

The parameters are the same as for the <<docs-termvectors,Term Vectors API>>.
Use `"term_vectors": true` with no parameters, to only return the term vectors
stored for each document hit. This will ensure that if the term vectors are
not stored, they will not be computed on the fly.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so, when I do not want term vectors to be generated on the fly if they are not there, then I cannot configure any options?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you'd do "term_vectors" : true, which will return the stored term vectors only.


[NOTE]
Parameters such as `_index`, `_type`, `_id`, `doc`, `_routing`,
`_version` and `_version_type` are not allowed.
35 changes: 35 additions & 0 deletions rest-api-spec/test/search/60_term_vectors.yaml
@@ -0,0 +1,35 @@
---
"Term Vectors":

- do:
indices.create:
index: test
body:
mappings:
type:
"properties":
"text":
"type" : "string"
"term_vector" : "with_positions_offsets"
- do:
index:
index: test
type: type
id: 1
body:
"text" : "The quick brown fox is brown."
- do:
indices.refresh: {}

- do:
search:
index: test
type: type
body:
term_vectors: { term_statistics: true }
query: { match_all: {} }

- match: { hits.total: 1 }
- match: { hits.hits.0.term_vectors.text.field_statistics.sum_doc_freq: 5 }
- match: { hits.hits.0.term_vectors.text.terms.brown.doc_freq: 1 }
- match: { hits.hits.0.term_vectors.text.terms.brown.tokens.0.start_offset: 10 }
Expand Up @@ -35,6 +35,7 @@
import org.elasticsearch.search.aggregations.AbstractAggregationBuilder;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.fetch.innerhits.InnerHitsBuilder;
import org.elasticsearch.search.fetch.termvectors.TermVectorsBuilder;
import org.elasticsearch.search.highlight.HighlightBuilder;
import org.elasticsearch.search.rescore.RescoreBuilder;
import org.elasticsearch.search.sort.SortBuilder;
Expand Down Expand Up @@ -970,6 +971,22 @@ public SearchRequestBuilder setQueryCache(Boolean queryCache) {
return this;
}

/**
* Specifies whether to return the stored term vectors for each hit, disregarding any previous parameters.
*/
public SearchRequestBuilder setTermVectors(boolean fetch) {
sourceBuilder().termVectors(fetch);
return this;
}

/**
* Specifies how term vectors should be fetched for each hit.
*/
public SearchRequestBuilder setTermVectors(TermVectorsBuilder termVectorsBuilder) {
sourceBuilder().termVectors(termVectorsBuilder);
return this;
}

/**
* Sets the source builder to be used with this request. Note, any operations done
* on this require builder before are discarded as this internal builder replaces
Expand Down
Expand Up @@ -33,6 +33,7 @@
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.lucene.uid.Versions;
import org.elasticsearch.common.xcontent.ToXContent;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.VersionType;
Expand Down Expand Up @@ -77,7 +78,7 @@ public class TermVectorsRequest extends SingleShardOperationRequest<TermVectorsR

private FilterSettings filterSettings;

public static final class FilterSettings {
public static final class FilterSettings implements ToXContent {
public Integer maxNumTerms;
public Integer minTermFreq;
public Integer maxTermFreq;
Expand Down Expand Up @@ -121,6 +122,33 @@ public void writeTo(StreamOutput out) throws IOException {
out.writeOptionalVInt(minWordLength);
out.writeOptionalVInt(maxWordLength);
}

@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject("filter");
if (maxNumTerms != null) {
builder.field("max_num_terms", maxNumTerms);
}
if (minTermFreq != null) {
builder.field("min_term_freq", minTermFreq);
}
if (maxTermFreq != null) {
builder.field("max_term_freq", maxTermFreq);
}
if (minDocFreq != null) {
builder.field("min_doc_freq", minDocFreq);
}
if (maxDocFreq != null) {
builder.field("max_doc_freq", maxDocFreq);
}
if (minWordLength != null) {
builder.field("min_word_length", minWordLength);
}
if (maxWordLength != null) {
builder.field("max_word_length", maxWordLength);
}
return builder.endObject();
}
}

private EnumSet<Flag> flagsEnum = EnumSet.of(Flag.Positions, Flag.Offsets, Flag.Payloads,
Expand Down Expand Up @@ -574,14 +602,17 @@ public static enum Flag {
/**
* populates a request object (pre-populated with defaults) based on a parser.
*/
public static void parseRequest(TermVectorsRequest termVectorsRequest, XContentParser parser) throws IOException {
public static void parseRequest(TermVectorsRequest termVectorsRequest, XContentParser parser, @Nullable Set<String> disallowedParameters) throws IOException {
XContentParser.Token token;
String currentFieldName = null;
List<String> fields = new ArrayList<>();
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
if (token == XContentParser.Token.FIELD_NAME) {
currentFieldName = parser.currentName();
} else if (currentFieldName != null) {
if (disallowedParameters != null && disallowedParameters.contains(currentFieldName)) {
throw new ElasticsearchParseException("The parameter \"" + currentFieldName + "\" is not allowed!");
}
if (currentFieldName.equals("fields")) {
if (token == XContentParser.Token.START_ARRAY) {
while (parser.nextToken() != XContentParser.Token.END_ARRAY) {
Expand Down Expand Up @@ -625,7 +656,8 @@ public static void parseRequest(TermVectorsRequest termVectorsRequest, XContentP
termVectorsRequest.routing = parser.text();
} else if ("_version".equals(currentFieldName) || "version".equals(currentFieldName)) {
termVectorsRequest.version = parser.longValue();
} else if ("_version_type".equals(currentFieldName) || "_versionType".equals(currentFieldName) || "version_type".equals(currentFieldName) || "versionType".equals(currentFieldName)) {
} else if ("_version_type".equals(currentFieldName) || "version_type".equals(currentFieldName) ||
"_versionType".equals(currentFieldName) || "versionType".equals(currentFieldName)) {
termVectorsRequest.versionType = VersionType.fromString(parser.text());
} else {
throw new ElasticsearchParseException("The parameter " + currentFieldName
Expand All @@ -639,6 +671,10 @@ public static void parseRequest(TermVectorsRequest termVectorsRequest, XContentP
}
}

public static void parseRequest(TermVectorsRequest termVectorsRequest, XContentParser parser) throws IOException {
parseRequest(termVectorsRequest, parser, null);
}

private static Map<String, String> readPerFieldAnalyzer(Map<String, Object> map) {
Map<String, String> mapStrStr = new HashMap<>();
for (Map.Entry<String, Object> e : map.entrySet()) {
Expand Down
Expand Up @@ -102,7 +102,7 @@ public TermVectorsResponse(String index, String type, String id) {
this.id = id;
}

TermVectorsResponse() {
public TermVectorsResponse() {
}

@Override
Expand Down Expand Up @@ -184,9 +184,13 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
builder.field(FieldStrings._VERSION, docVersion);
builder.field(FieldStrings.FOUND, isExists());
builder.field(FieldStrings.TOOK, tookInMillis);
if (!isExists()) {
return builder;
if (isExists()) {
buildTermVectors(builder);
}
return builder;
}

public void buildTermVectors(XContentBuilder builder) throws IOException {
builder.startObject(FieldStrings.TERM_VECTORS);
final CharsRefBuilder spare = new CharsRefBuilder();
Fields theFields = getFields();
Expand All @@ -195,7 +199,6 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
buildField(builder, spare, theFields, fieldIter);
}
builder.endObject();
return builder;
}

private void buildField(XContentBuilder builder, final CharsRefBuilder spare, Fields theFields, Iterator<String> fieldIter) throws IOException {
Expand Down
6 changes: 6 additions & 0 deletions src/main/java/org/elasticsearch/search/SearchHit.java
Expand Up @@ -21,6 +21,7 @@

import org.apache.lucene.search.Explanation;
import org.elasticsearch.ElasticsearchParseException;
import org.elasticsearch.action.termvectors.TermVectorsResponse;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.io.stream.Streamable;
import org.elasticsearch.common.text.Text;
Expand Down Expand Up @@ -226,4 +227,9 @@ public interface NestedIdentity {
*/
public NestedIdentity getChild();
}

/**
* The possibly fetched term vectors of the hit.
*/
TermVectorsResponse getTermVectorsResponse();
}
Expand Up @@ -40,6 +40,7 @@
import org.elasticsearch.search.aggregations.AbstractAggregationBuilder;
import org.elasticsearch.search.fetch.innerhits.InnerHitsBuilder;
import org.elasticsearch.search.fetch.source.FetchSourceContext;
import org.elasticsearch.search.fetch.termvectors.TermVectorsBuilder;
import org.elasticsearch.search.highlight.HighlightBuilder;
import org.elasticsearch.search.internal.SearchContext;
import org.elasticsearch.search.rescore.RescoreBuilder;
Expand Down Expand Up @@ -123,7 +124,7 @@ public static HighlightBuilder highlight() {

private String[] stats;


private TermVectorsBuilder termVectorsBuilder;
/**
* Constructs a new search source builder.
*/
Expand Down Expand Up @@ -623,6 +624,25 @@ public SearchSourceBuilder stats(String... statsGroups) {
return this;
}

/**
* Specifies whether to return the stored term vectors for each hit, disregarding any previous parameters.
*/
public SearchSourceBuilder termVectors(boolean fetch) {
if (this.termVectorsBuilder == null) {
this.termVectorsBuilder = new TermVectorsBuilder();
}
this.termVectorsBuilder.setFetchOnly(fetch);
return this;
}

/**
* Specifies how term vectors should be fetched for each hit.
*/
public SearchSourceBuilder termVectors(TermVectorsBuilder termVectorsBuilder) {
this.termVectorsBuilder = termVectorsBuilder;
return this;
}

@Override
public String toString() {
try {
Expand Down Expand Up @@ -854,6 +874,10 @@ public void innerToXContent(XContentBuilder builder, Params params) throws IOExc
}
builder.endArray();
}

if (termVectorsBuilder != null) {
termVectorsBuilder.toXContent(builder, params);
}
}

private static class ScriptField {
Expand Down
14 changes: 4 additions & 10 deletions src/main/java/org/elasticsearch/search/fetch/FetchPhase.java
Expand Up @@ -21,7 +21,6 @@

import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;

import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.search.DocIdSetIterator;
Expand Down Expand Up @@ -56,22 +55,17 @@
import org.elasticsearch.search.fetch.script.ScriptFieldsFetchSubPhase;
import org.elasticsearch.search.fetch.source.FetchSourceContext;
import org.elasticsearch.search.fetch.source.FetchSourceSubPhase;
import org.elasticsearch.search.fetch.termvectors.TermVectorsFetchSubPhase;
import org.elasticsearch.search.fetch.version.VersionFetchSubPhase;
import org.elasticsearch.search.highlight.HighlightPhase;
import org.elasticsearch.search.internal.InternalSearchHit;
import org.elasticsearch.search.internal.InternalSearchHitField;
import org.elasticsearch.search.internal.InternalSearchHits;
import org.elasticsearch.search.internal.SearchContext;
import org.elasticsearch.search.lookup.LeafSearchLookup;
import org.elasticsearch.search.lookup.SourceLookup;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.*;

import static com.google.common.collect.Lists.newArrayList;
import static org.elasticsearch.common.xcontent.XContentFactory.contentBuilder;
Expand All @@ -87,10 +81,10 @@ public class FetchPhase implements SearchPhase {
public FetchPhase(HighlightPhase highlightPhase, ScriptFieldsFetchSubPhase scriptFieldsPhase,
MatchedQueriesFetchSubPhase matchedQueriesPhase, ExplainFetchSubPhase explainPhase, VersionFetchSubPhase versionPhase,
FetchSourceSubPhase fetchSourceSubPhase, FieldDataFieldsFetchSubPhase fieldDataFieldsFetchSubPhase,
InnerHitsFetchSubPhase innerHitsFetchSubPhase) {
InnerHitsFetchSubPhase innerHitsFetchSubPhase, TermVectorsFetchSubPhase termVectorsFetchSubPhase) {
innerHitsFetchSubPhase.setFetchPhase(this);
this.fetchSubPhases = new FetchSubPhase[]{scriptFieldsPhase, matchedQueriesPhase, explainPhase, highlightPhase,
fetchSourceSubPhase, versionPhase, fieldDataFieldsFetchSubPhase, innerHitsFetchSubPhase};
fetchSourceSubPhase, versionPhase, fieldDataFieldsFetchSubPhase, innerHitsFetchSubPhase, termVectorsFetchSubPhase};
}

@Override
Expand Down
Expand Up @@ -19,9 +19,9 @@
package org.elasticsearch.search.fetch;

import com.google.common.collect.Maps;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.IndexSearcher;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.search.SearchHit;
Expand Down
Expand Up @@ -19,7 +19,6 @@

package org.elasticsearch.search.fetch.source;

import org.elasticsearch.Version;
import org.elasticsearch.common.Booleans;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.io.stream.StreamInput;
Expand Down