Skip to content

Commit

Permalink
Fixed multi term queries support in postings highlighter for non top-…
Browse files Browse the repository at this point in the history
…level queries

In #4052 we added support for highlighting multi term queries using the postings highlighter. That worked only for top-level queries though, and not for multi term queries that are nested for instance within a bool query, or filtered query, or a constant score query.

The way we make this work is by walking the query structure and temporarily overriding the query rewrite method with a method that allows for multi terms extraction.

Closes #5127
  • Loading branch information
javanna committed Feb 21, 2014
1 parent 2a1db2d commit a4071cb
Show file tree
Hide file tree
Showing 3 changed files with 136 additions and 23 deletions.
8 changes: 8 additions & 0 deletions docs/reference/search/request/highlighting.asciidoc
Expand Up @@ -57,12 +57,20 @@ highlighting using the postings highlighter on it:
}
--------------------------------------------------

[NOTE]
Note that the postings highlighter is meant to perform simple query terms
highlighting, regardless of their positions. That means that when used for
instance in combination with a phrase query, it will highlight all the terms
that the query is composed of, regardless of whether they are actually part of
a query match, effectively ignoring their positions.

[WARNING]
The postings highlighter does support highlighting of multi term queries, like
prefix queries, wildcard queries and so on. On the other hand, this requires
the queries to be rewritten using a proper
<<query-dsl-multi-term-rewrite,rewrite method>> that supports multi term
extraction, which is a potentially expensive operation.


==== Fast vector highlighter

Expand Down
Expand Up @@ -18,6 +18,7 @@
*/
package org.elasticsearch.search.highlight;

import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexReader;
Expand All @@ -33,6 +34,8 @@
import org.apache.lucene.util.UnicodeUtil;
import org.elasticsearch.ElasticsearchIllegalArgumentException;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.collect.Tuple;
import org.elasticsearch.common.lucene.search.XFilteredQuery;
import org.elasticsearch.common.text.StringText;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.search.fetch.FetchPhaseExecutionException;
Expand Down Expand Up @@ -144,25 +147,17 @@ public int compare(Snippet o1, Snippet o2) {
}

private static Query rewrite(HighlighterContext highlighterContext, IndexReader reader) throws IOException {
//rewrite is expensive: if the query was already rewritten we try not to rewrite
boolean mustRewrite = !highlighterContext.query.queryRewritten();

Query original = highlighterContext.query.originalQuery();

MultiTermQuery originalMultiTermQuery = null;
MultiTermQuery.RewriteMethod originalRewriteMethod = null;
if (original instanceof MultiTermQuery) {
originalMultiTermQuery = (MultiTermQuery) original;
if (!allowsForTermExtraction(originalMultiTermQuery.getRewriteMethod())) {
originalRewriteMethod = originalMultiTermQuery.getRewriteMethod();
originalMultiTermQuery.setRewriteMethod(new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(50));
//we need to rewrite anyway if it is a multi term query which was rewritten with the wrong rewrite method
mustRewrite = true;
}
}
//we walk the query tree and when we encounter multi term queries we need to make sure the rewrite method
//supports multi term extraction. If not we temporarily override it (and restore it after the rewrite).
List<Tuple<MultiTermQuery, MultiTermQuery.RewriteMethod>> modifiedMultiTermQueries = Lists.newArrayList();
overrideMultiTermRewriteMethod(original, modifiedMultiTermQueries);

if (!mustRewrite) {
//return the rewritten query
//rewrite is expensive: if the query was already rewritten we try not to rewrite it again
if (highlighterContext.query.queryRewritten() && modifiedMultiTermQueries.size() == 0) {
//return the already rewritten query
return highlighterContext.query.query();
}

Expand All @@ -172,16 +167,46 @@ private static Query rewrite(HighlighterContext highlighterContext, IndexReader
query = rewrittenQuery;
}

if (originalMultiTermQuery != null) {
if (originalRewriteMethod != null) {
//set back the original rewrite method after the rewrite is done
originalMultiTermQuery.setRewriteMethod(originalRewriteMethod);
}
//set back the original rewrite method after the rewrite is done
for (Tuple<MultiTermQuery, MultiTermQuery.RewriteMethod> modifiedMultiTermQuery : modifiedMultiTermQueries) {
modifiedMultiTermQuery.v1().setRewriteMethod(modifiedMultiTermQuery.v2());
}

return query;
}

private static void overrideMultiTermRewriteMethod(Query query, List<Tuple<MultiTermQuery, MultiTermQuery.RewriteMethod>> modifiedMultiTermQueries) {

if (query instanceof MultiTermQuery) {
MultiTermQuery originalMultiTermQuery = (MultiTermQuery) query;
if (!allowsForTermExtraction(originalMultiTermQuery.getRewriteMethod())) {
MultiTermQuery.RewriteMethod originalRewriteMethod = originalMultiTermQuery.getRewriteMethod();
originalMultiTermQuery.setRewriteMethod(new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(50));
//we need to rewrite anyway if it is a multi term query which was rewritten with the wrong rewrite method
modifiedMultiTermQueries.add(Tuple.tuple(originalMultiTermQuery, originalRewriteMethod));
}
}

if (query instanceof BooleanQuery) {
BooleanQuery booleanQuery = (BooleanQuery) query;
for (BooleanClause booleanClause : booleanQuery) {
overrideMultiTermRewriteMethod(booleanClause.getQuery(), modifiedMultiTermQueries);
}
}

if (query instanceof XFilteredQuery) {
overrideMultiTermRewriteMethod(((XFilteredQuery) query).getQuery(), modifiedMultiTermQueries);
}

if (query instanceof FilteredQuery) {
overrideMultiTermRewriteMethod(((FilteredQuery) query).getQuery(), modifiedMultiTermQueries);
}

if (query instanceof ConstantScoreQuery) {
overrideMultiTermRewriteMethod(((ConstantScoreQuery) query).getQuery(), modifiedMultiTermQueries);
}
}

private static boolean allowsForTermExtraction(MultiTermQuery.RewriteMethod rewriteMethod) {
return rewriteMethod instanceof TopTermsRewrite || rewriteMethod instanceof ScoringRewrite;
}
Expand Down
Expand Up @@ -48,6 +48,7 @@
import static org.elasticsearch.action.search.SearchType.QUERY_THEN_FETCH;
import static org.elasticsearch.client.Requests.searchRequest;
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
import static org.elasticsearch.index.query.FilterBuilders.*;
import static org.elasticsearch.index.query.QueryBuilders.*;
import static org.elasticsearch.search.builder.SearchSourceBuilder.highlight;
import static org.elasticsearch.search.builder.SearchSourceBuilder.searchSource;
Expand Down Expand Up @@ -1393,23 +1394,23 @@ public void testPhrasePrefix() throws ElasticsearchException, IOException {
.setSource("field4", "a quick fast blue car").get();
refresh();

source = searchSource().postFilter(FilterBuilders.typeFilter("type2")).query(matchPhrasePrefixQuery("field3", "fast bro")).from(0).size(60).explain(true)
source = searchSource().postFilter(typeFilter("type2")).query(matchPhrasePrefixQuery("field3", "fast bro")).from(0).size(60).explain(true)
.highlight(highlight().field("field3").order("score").preTags("<x>").postTags("</x>"));

searchResponse = client().search(searchRequest("test").source(source).searchType(QUERY_THEN_FETCH)).actionGet();

assertHighlight(searchResponse, 0, "field3", 0, 1, equalTo("The <x>quick</x> <x>brown</x> fox jumps over the lazy dog"));

logger.info("--> highlighting and searching on field4");
source = searchSource().postFilter(FilterBuilders.typeFilter("type2")).query(matchPhrasePrefixQuery("field4", "the fast bro")).from(0).size(60).explain(true)
source = searchSource().postFilter(typeFilter("type2")).query(matchPhrasePrefixQuery("field4", "the fast bro")).from(0).size(60).explain(true)
.highlight(highlight().field("field4").order("score").preTags("<x>").postTags("</x>"));
searchResponse = client().search(searchRequest("test").source(source).searchType(QUERY_THEN_FETCH)).actionGet();

assertHighlight(searchResponse, 0, "field4", 0, 1, equalTo("<x>The quick browse</x> button is a fancy thing, right bro?"));
assertHighlight(searchResponse, 1, "field4", 0, 1, equalTo("<x>The quick brown</x> fox jumps over the lazy dog"));

logger.info("--> highlighting and searching on field4");
source = searchSource().postFilter(FilterBuilders.typeFilter("type2")).query(matchPhrasePrefixQuery("field4", "a fast quick blue ca")).from(0).size(60).explain(true)
source = searchSource().postFilter(typeFilter("type2")).query(matchPhrasePrefixQuery("field4", "a fast quick blue ca")).from(0).size(60).explain(true)
.highlight(highlight().field("field4").order("score").preTags("<x>").postTags("</x>"));
searchResponse = client().search(searchRequest("test").source(source).searchType(QUERY_THEN_FETCH)).actionGet();

Expand Down Expand Up @@ -2419,6 +2420,85 @@ public void testPostingsHighlighterQueryString() throws Exception {
}
}

@Test
public void testPostingsHighlighterRegexpQueryWithinConstantScoreQuery() throws Exception {

assertAcked(client().admin().indices().prepareCreate("test").addMapping("type1", type1PostingsffsetsMapping()));
ensureGreen();

client().prepareIndex("test", "type1").setSource("field1", "The photography word will get highlighted").get();
refresh();

logger.info("--> highlighting and searching on field1");
for (String rewriteMethod : REWRITE_METHODS) {
SearchSourceBuilder source = searchSource().query(constantScoreQuery(regexpQuery("field1", "pho[a-z]+").rewrite(rewriteMethod)))
.highlight(highlight().field("field1"));
SearchResponse searchResponse = client().search(searchRequest("test").source(source)
.searchType(randomBoolean() ? SearchType.DFS_QUERY_THEN_FETCH : SearchType.QUERY_THEN_FETCH)).get();
assertHighlight(searchResponse, 0, "field1", 0, 1, equalTo("The <em>photography</em> word will get highlighted"));
}
}

@Test
public void testPostingsHighlighterMultiTermQueryMultipleLevels() throws Exception {

assertAcked(client().admin().indices().prepareCreate("test").addMapping("type1", type1PostingsffsetsMapping()));
ensureGreen();

client().prepareIndex("test", "type1").setSource("field1", "The photography word will get highlighted").get();
refresh();

logger.info("--> highlighting and searching on field1");
for (String rewriteMethod : REWRITE_METHODS) {
SearchSourceBuilder source = searchSource().query(boolQuery()
.should(constantScoreQuery(FilterBuilders.missingFilter("field1")))
.should(matchQuery("field1", "test"))
.should(filteredQuery(queryString("field1:photo*").rewrite(rewriteMethod), null)))
.highlight(highlight().field("field1"));
SearchResponse searchResponse = client().search(searchRequest("test").source(source)
.searchType(randomBoolean() ? SearchType.DFS_QUERY_THEN_FETCH : SearchType.QUERY_THEN_FETCH)).get();
assertHighlight(searchResponse, 0, "field1", 0, 1, equalTo("The <em>photography</em> word will get highlighted"));
}
}

@Test
public void testPostingsHighlighterPrefixQueryWithinBooleanQuery() throws Exception {

assertAcked(client().admin().indices().prepareCreate("test").addMapping("type1", type1PostingsffsetsMapping()));
ensureGreen();

client().prepareIndex("test", "type1").setSource("field1", "The photography word will get highlighted").get();
refresh();

logger.info("--> highlighting and searching on field1");
for (String rewriteMethod : REWRITE_METHODS) {
SearchSourceBuilder source = searchSource().query(boolQuery().must(prefixQuery("field1", "photo").rewrite(rewriteMethod)).should(matchQuery("field1", "test").minimumShouldMatch("0")))
.highlight(highlight().field("field1"));
SearchResponse searchResponse = client().search(searchRequest("test").source(source)
.searchType(randomBoolean() ? SearchType.DFS_QUERY_THEN_FETCH : SearchType.QUERY_THEN_FETCH)).get();
assertHighlight(searchResponse, 0, "field1", 0, 1, equalTo("The <em>photography</em> word will get highlighted"));
}
}

@Test
public void testPostingsHighlighterQueryStringWithinFilteredQuery() throws Exception {

assertAcked(client().admin().indices().prepareCreate("test").addMapping("type1", type1PostingsffsetsMapping()));
ensureGreen();

client().prepareIndex("test", "type1").setSource("field1", "The photography word will get highlighted").get();
refresh();

logger.info("--> highlighting and searching on field1");
for (String rewriteMethod : REWRITE_METHODS) {
SearchSourceBuilder source = searchSource().query(filteredQuery(queryString("field1:photo*").rewrite(rewriteMethod), missingFilter("field_null")))
.highlight(highlight().field("field1"));
SearchResponse searchResponse = client().search(searchRequest("test").source(source)
.searchType(randomBoolean() ? SearchType.DFS_QUERY_THEN_FETCH : SearchType.QUERY_THEN_FETCH)).get();
assertHighlight(searchResponse, 0, "field1", 0, 1, equalTo("The <em>photography</em> word will get highlighted"));
}
}

@Test
@Slow
public void testPostingsHighlighterManyDocs() throws Exception {
Expand Down

0 comments on commit a4071cb

Please sign in to comment.