Skip to content

Commit

Permalink
Added support for highlighting multi term queries using the postings …
Browse files Browse the repository at this point in the history
…highlighter

Closes elastic#4042
  • Loading branch information
javanna committed Nov 4, 2013
1 parent c141020 commit 002acdc
Show file tree
Hide file tree
Showing 2 changed files with 180 additions and 16 deletions.
Expand Up @@ -21,10 +21,11 @@
import com.google.common.collect.Maps;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoringRewrite;
import org.apache.lucene.search.TopTermsRewrite;
import org.apache.lucene.search.highlight.Encoder;
import org.apache.lucene.search.postingshighlight.CustomPassageFormatter;
import org.apache.lucene.search.postingshighlight.CustomPostingsHighlighter;
Expand Down Expand Up @@ -67,9 +68,10 @@ public HighlightField highlight(HighlighterContext highlighterContext) {
FetchSubPhase.HitContext hitContext = highlighterContext.hitContext;

if (!hitContext.cache().containsKey(CACHE_KEY)) {
//get the non rewritten query and rewrite it
Query query;
try {
query = rewrite(context.query());
query = rewrite(context, hitContext.topLevelReader());
} catch (IOException e) {
throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + highlighterContext.fieldName + "]", e);
}
Expand Down Expand Up @@ -107,7 +109,7 @@ public HighlightField highlight(HighlighterContext highlighterContext) {
//we highlight every value separately calling the highlight method multiple times, only if we need to have back a snippet per value (whole value)
int values = mergeValues ? 1 : textsToHighlight.size();
for (int i = 0; i < values; i++) {
Snippet[] fieldSnippets = highlighter.highlightDoc(highlighterContext.fieldName, mapperHighlighterEntry.filteredQueryTerms, new IndexSearcher(hitContext.reader()), hitContext.docId(), numberOfFragments);
Snippet[] fieldSnippets = highlighter.highlightDoc(highlighterContext.fieldName, mapperHighlighterEntry.filteredQueryTerms, context.searcher(), hitContext.docId(), numberOfFragments);
if (fieldSnippets != null) {
for (Snippet fieldSnippet : fieldSnippets) {
if (Strings.hasText(fieldSnippet.getText())) {
Expand Down Expand Up @@ -144,17 +146,49 @@ public int compare(Snippet o1, Snippet o2) {
return null;
}

private static final IndexReader EMPTY_INDEXREADER = new MultiReader();
private static Query rewrite(SearchContext searchContext, IndexReader reader) throws IOException {
//rewrite is expensive: if the query was already rewritten we try not to rewrite
boolean mustRewrite = !searchContext.queryRewritten();

Query original = searchContext.parsedQuery().query();

MultiTermQuery originalMultiTermQuery = null;
MultiTermQuery.RewriteMethod originalRewriteMethod = null;
if (original instanceof MultiTermQuery) {
originalMultiTermQuery = (MultiTermQuery) original;
if (!allowsForTermExtraction(originalMultiTermQuery.getRewriteMethod())) {
originalRewriteMethod = originalMultiTermQuery.getRewriteMethod();
originalMultiTermQuery.setRewriteMethod(new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(50));
//we need to rewrite anyway if it is a multi term query which was rewritten with the wrong rewrite method
mustRewrite = true;
}
}

if (!mustRewrite) {
//return the rewritten query
return searchContext.query();
}

private static Query rewrite(Query original) throws IOException {
Query query = original;
for (Query rewrittenQuery = query.rewrite(EMPTY_INDEXREADER); rewrittenQuery != query;
rewrittenQuery = query.rewrite(EMPTY_INDEXREADER)) {
for (Query rewrittenQuery = query.rewrite(reader); rewrittenQuery != query;
rewrittenQuery = query.rewrite(reader)) {
query = rewrittenQuery;
}

if (originalMultiTermQuery != null) {
if (originalRewriteMethod != null) {
//set back the original rewrite method after the rewrite is done
originalMultiTermQuery.setRewriteMethod(originalRewriteMethod);
}
}

return query;
}

private static boolean allowsForTermExtraction(MultiTermQuery.RewriteMethod rewriteMethod) {
return rewriteMethod instanceof TopTermsRewrite || rewriteMethod instanceof ScoringRewrite;
}

private static SortedSet<Term> extractTerms(Query query) {
SortedSet<Term> queryTerms = new TreeSet<Term>();
query.extractTerms(queryTerms);
Expand Down
Expand Up @@ -2019,7 +2019,7 @@ public void testPostingsHighlighter() throws Exception {
ensureGreen();

client().prepareIndex("test", "type1")
.setSource("field1", "this is a test", "field2", "The quick brown fox jumps over the lazy dog").setRefresh(true).get();
.setSource("field1", "this is a test", "field2", "The quick brown fox jumps over the lazy quick dog").setRefresh(true).get();

logger.info("--> highlighting and searching on field1");
SearchSourceBuilder source = searchSource()
Expand Down Expand Up @@ -2049,28 +2049,28 @@ public void testPostingsHighlighter() throws Exception {
searchResponse = client().search(searchRequest("test").source(source)).actionGet();
assertHitCount(searchResponse, 1l);

assertThat(searchResponse.getHits().getAt(0).highlightFields().get("field2").fragments()[0].string(), equalTo("The <xxx>quick</xxx> brown fox jumps over the lazy dog"));
assertThat(searchResponse.getHits().getAt(0).highlightFields().get("field2").fragments()[0].string(), equalTo("The <xxx>quick</xxx> brown fox jumps over the lazy <xxx>quick</xxx> dog"));

logger.info("--> searching on _all, highlighting on field2");
source = searchSource()
.query(prefixQuery("_all", "qui"))
.query(matchPhraseQuery("_all", "quick brown"))
.highlight(highlight().field("field2").preTags("<xxx>").postTags("</xxx>"));

searchResponse = client().search(searchRequest("test").source(source)).actionGet();
assertHitCount(searchResponse, 1l);
//no snippets produced for prefix query, not supported by postings highlighter
assertThat(searchResponse.getHits().getAt(0).highlightFields().size(), equalTo(0));
//phrase query results in highlighting all different terms regardless of their positions
assertThat(searchResponse.getHits().getAt(0).highlightFields().get("field2").fragments()[0].string(), equalTo("The <xxx>quick</xxx> <xxx>brown</xxx> fox jumps over the lazy <xxx>quick</xxx> dog"));

//lets fall back to the standard highlighter then, what people would do with unsupported queries
//lets fall back to the standard highlighter then, what people would do to highlight query matches
logger.info("--> searching on _all, highlighting on field2, falling back to the plain highlighter");
source = searchSource()
.query(prefixQuery("_all", "qui"))
.query(matchPhraseQuery("_all", "quick brown"))
.highlight(highlight().field("field2").preTags("<xxx>").postTags("</xxx>").highlighterType("highlighter"));

searchResponse = client().search(searchRequest("test").source(source)).actionGet();
assertHitCount(searchResponse, 1l);

assertThat(searchResponse.getHits().getAt(0).highlightFields().get("field2").fragments()[0].string(), equalTo("The <xxx>quick</xxx> brown fox jumps over the lazy dog"));
assertThat(searchResponse.getHits().getAt(0).highlightFields().get("field2").fragments()[0].string(), equalTo("The <xxx>quick</xxx> <xxx>brown</xxx> fox jumps over the lazy quick dog"));
}

@Test
Expand Down Expand Up @@ -2486,6 +2486,136 @@ public XContentBuilder type1PostingsffsetsMapping() throws IOException {
.endObject().endObject();
}

private static final String[] REWRITE_METHODS = new String[]{"constant_score_auto", "scoring_boolean", "constant_score_boolean",
"constant_score_filter", "top_terms_boost_50", "top_terms_50"};

@Test
public void testPostingsHighlighterPrefixQuery() throws Exception {
assertAcked(client().admin().indices().prepareCreate("test").addMapping("type1", type1PostingsffsetsMapping()));
ensureGreen();

client().prepareIndex("test", "type1").setSource("field1", "this is a test", "field2", "The quick brown fox jumps over the lazy dog! Second sentence.").get();
refresh();
logger.info("--> highlighting and searching on field2");

for (String rewriteMethod : REWRITE_METHODS) {
SearchSourceBuilder source = searchSource().query(prefixQuery("field2", "qui").rewrite(rewriteMethod))
.highlight(highlight().field("field2"));
SearchResponse searchResponse = client().search(searchRequest("test").source(source)
.searchType(randomBoolean() ? SearchType.DFS_QUERY_THEN_FETCH : SearchType.QUERY_THEN_FETCH)).get();
assertHitCount(searchResponse, 1l);

assertThat(searchResponse.getHits().getAt(0).highlightFields().get("field2").fragments()[0].string(),
equalTo("The <em>quick</em> brown fox jumps over the lazy dog!"));
}
}

@Test
public void testPostingsHighlighterFuzzyQuery() throws Exception {
assertAcked(client().admin().indices().prepareCreate("test").addMapping("type1", type1PostingsffsetsMapping()));
ensureGreen();

client().prepareIndex("test", "type1").setSource("field1", "this is a test", "field2", "The quick brown fox jumps over the lazy dog! Second sentence.").get();
refresh();
logger.info("--> highlighting and searching on field2");
SearchSourceBuilder source = searchSource().query(fuzzyQuery("field2", "quck"))
.highlight(highlight().field("field2"));
SearchResponse searchResponse = client().search(searchRequest("test").source(source)
.searchType(randomBoolean() ? SearchType.DFS_QUERY_THEN_FETCH : SearchType.QUERY_THEN_FETCH)).get();
assertHitCount(searchResponse, 1l);

assertThat(searchResponse.getHits().getAt(0).highlightFields().get("field2").fragments()[0].string(),
equalTo("The <em>quick</em> brown fox jumps over the lazy dog!"));
}

@Test
public void testPostingsHighlighterRegexpQuery() throws Exception {
assertAcked(client().admin().indices().prepareCreate("test").addMapping("type1", type1PostingsffsetsMapping()));
ensureGreen();

client().prepareIndex("test", "type1").setSource("field1", "this is a test", "field2", "The quick brown fox jumps over the lazy dog! Second sentence.").get();
refresh();
logger.info("--> highlighting and searching on field2");
for (String rewriteMethod : REWRITE_METHODS) {
SearchSourceBuilder source = searchSource().query(regexpQuery("field2", "qu[a-l]+k").rewrite(rewriteMethod))
.highlight(highlight().field("field2"));
SearchResponse searchResponse = client().search(searchRequest("test").source(source)
.searchType(randomBoolean() ? SearchType.DFS_QUERY_THEN_FETCH : SearchType.QUERY_THEN_FETCH)).get();
assertHitCount(searchResponse, 1l);

assertThat(searchResponse.getHits().getAt(0).highlightFields().get("field2").fragments()[0].string(),
equalTo("The <em>quick</em> brown fox jumps over the lazy dog!"));
}
}

@Test
public void testPostingsHighlighterWildcardQuery() throws Exception {
assertAcked(client().admin().indices().prepareCreate("test").addMapping("type1", type1PostingsffsetsMapping()));
ensureGreen();

client().prepareIndex("test", "type1").setSource("field1", "this is a test", "field2", "The quick brown fox jumps over the lazy dog! Second sentence.").get();
refresh();
logger.info("--> highlighting and searching on field2");
for (String rewriteMethod : REWRITE_METHODS) {
SearchSourceBuilder source = searchSource().query(wildcardQuery("field2", "qui*").rewrite(rewriteMethod))
.highlight(highlight().field("field2"));
SearchResponse searchResponse = client().search(searchRequest("test").source(source)
.searchType(randomBoolean() ? SearchType.DFS_QUERY_THEN_FETCH : SearchType.QUERY_THEN_FETCH)).get();
assertHitCount(searchResponse, 1l);

assertThat(searchResponse.getHits().getAt(0).highlightFields().get("field2").fragments()[0].string(),
equalTo("The <em>quick</em> brown fox jumps over the lazy dog!"));

source = searchSource().query(wildcardQuery("field2", "qu*k").rewrite(rewriteMethod))
.highlight(highlight().field("field2"));
searchResponse = client().search(searchRequest("test").source(source)
.searchType(randomBoolean() ? SearchType.DFS_QUERY_THEN_FETCH : SearchType.QUERY_THEN_FETCH)).get();
assertHitCount(searchResponse, 1l);

assertThat(searchResponse.getHits().getAt(0).highlightFields().get("field2").fragments()[0].string(),
equalTo("The <em>quick</em> brown fox jumps over the lazy dog!"));

}
}

@Test
public void testPostingsHighlighterTermRangeQuery() throws Exception {
assertAcked(client().admin().indices().prepareCreate("test").addMapping("type1", type1PostingsffsetsMapping()));
ensureGreen();

client().prepareIndex("test", "type1").setSource("field1", "this is a test", "field2", "aaab").get();
refresh();
logger.info("--> highlighting and searching on field2");
SearchSourceBuilder source = searchSource().query(rangeQuery("field2").gte("aaaa").lt("zzzz"))
.highlight(highlight().field("field2"));
SearchResponse searchResponse = client().search(searchRequest("test").source(source)
.searchType(randomBoolean() ? SearchType.DFS_QUERY_THEN_FETCH : SearchType.QUERY_THEN_FETCH)).get();
assertHitCount(searchResponse, 1l);

assertThat(searchResponse.getHits().getAt(0).highlightFields().get("field2").fragments()[0].string(),
equalTo("<em>aaab</em>"));
}

@Test
public void testPostingsHighlighterQueryString() throws Exception {
assertAcked(client().admin().indices().prepareCreate("test").addMapping("type1", type1PostingsffsetsMapping()));
ensureGreen();

client().prepareIndex("test", "type1").setSource("field1", "this is a test", "field2", "The quick brown fox jumps over the lazy dog! Second sentence.").get();
refresh();
logger.info("--> highlighting and searching on field2");
for (String rewriteMethod : REWRITE_METHODS) {
SearchSourceBuilder source = searchSource().query(queryString("qui*").defaultField("field2").rewrite(rewriteMethod))
.highlight(highlight().field("field2"));
SearchResponse searchResponse = client().search(searchRequest("test").source(source)
.searchType(randomBoolean() ? SearchType.DFS_QUERY_THEN_FETCH : SearchType.QUERY_THEN_FETCH)).get();
assertHitCount(searchResponse, 1l);

assertThat(searchResponse.getHits().getAt(0).highlightFields().get("field2").fragments()[0].string(),
equalTo("The <em>quick</em> brown fox jumps over the lazy dog!"));
}
}

@Test
@Slow
public void testPostingsHighlighterManyDocs() throws Exception {
Expand Down

0 comments on commit 002acdc

Please sign in to comment.