Skip to content

Commit

Permalink
Add support for conditional highlighting
Browse files Browse the repository at this point in the history
Adds a "conditional" object under each highlighting field that can contain
other fields.  Those fields are highlighted based on if there was a match
in the containing field.

It'll let you do things like this example in the docs:

This example extracts extracts the first 100 characters from `text` if there is
a match in `title`, otherwise it highlights `text` as normal.

{
    "highlight": {
        "fields": {
            "title": {
                "conditional": {
                    "match": {
                        "text": {
                            "no_match_size": 100,
                            "skip_matching":  true
                        }
                    }
                    "no_match": {
                        "text": {
                            "no_match_size": 100
                        }
                    },
                }
            }
        }
    }
}

Also adds a setting to instruct the highlighter to skip its normal match
logic and just do its no_match_size stuff.   This is very useful with
conditional highlighting "chains" that end in a no_match_size extracting
a previous entry in the chain.  Like this example from the docs:
{
    "highlight": {
        "fields": {
            "text": {
                "conditional": {
                    "no_match": {
                        "auxiliary_text": {
                            "conditional": {
                                "no_match": {
                                    "file_text": {
                                        "conditional": {
                                           "no_match": {
                                               "text": {
                                                    "no_match_size": 100,
                                                    "skip_matching": true
                                                }
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }
    }
}

Closes elastic#4649
  • Loading branch information
nik9000 committed Apr 25, 2014
1 parent 65bc017 commit 96997b3
Show file tree
Hide file tree
Showing 9 changed files with 572 additions and 187 deletions.
86 changes: 86 additions & 0 deletions docs/reference/search/request/highlighting.asciidoc
Expand Up @@ -547,3 +547,89 @@ keep in mind that scoring more phrases consumes more time and memory.

If using `matched_fields` keep in mind that `phrase_limit` phrases per
matched field are considered.

[[conditional]]
==== Conditional Highlighting
coming[1.2.0]

It is possible to vary the highlighting configuration based on where or not
there were matches on a field using the `conditional` object and it's child
objects, `match` and `no_match`. For example, the configuration below
highlights the `text` field first, the `auxiliary_text` field if there are
no matches there, the `file_text` field if there are no matches there, then
finally, if it didn't find anything, extracts some text from the beginning of
the `text` field:

[source,js]
--------------------------------------------------
{
"highlight": {
"fields": {
"text": {
"conditional": {
"no_match": {
"auxiliary_text": {
"conditional": {
"no_match": {
"file_text": {
"conditional": {
"no_match": {
"text": {
"no_match_size": 100,
"skip_matching": true
}
}
}
}
}
}
}
}
}
}
}
}
}
--------------------------------------------------
Note that adding `no_match_size` to the first `text` entry would short circuit
the whole thing because no match snippets count as matches for the purposes
of `conditional`.

This example extracts extracts the first 100 characters from `text` if there is
a match in `title`, otherwise it highlights `text` as normal.
[source,js]
--------------------------------------------------
{
"highlight": {
"fields": {
"title": {
"conditional": {
"match": {
"text": {
"no_match_size": 100,
"skip_matching": true
}
}
"no_match": {
"text": {
"no_match_size": 100
}
},
}
}
}
}
}
--------------------------------------------------


[[skip-matching]]
==== Conditional Highlighting
coming[1.2.0]

Setting `skip_matching` setting causes the highlighter to skip its normal match
finding behavior and just perform its `no_match_size` behavior. Thus setting
it without `no_match_size` doesn't make much sense.

The effect should be the same as setting the `highlight_query` to `match_all`
but more efficient.
Expand Up @@ -34,6 +34,7 @@
import org.elasticsearch.search.highlight.vectorhighlight.SourceSimpleFragmentsBuilder;
import org.elasticsearch.search.internal.SearchContext;

import java.io.IOException;
import java.util.Collections;
import java.util.Map;

Expand Down Expand Up @@ -76,21 +77,6 @@ public HighlightField highlight(HighlighterContext highlighterContext) {
HighlighterEntry cache = (HighlighterEntry) hitContext.cache().get(CACHE_KEY);

try {
FieldQuery fieldQuery;
if (field.fieldOptions().requireFieldMatch()) {
if (cache.fieldMatchFieldQuery == null) {
// we use top level reader to rewrite the query against all readers, with use caching it across hits (and across readers...)
cache.fieldMatchFieldQuery = new CustomFieldQuery(highlighterContext.query.originalQuery(), hitContext.topLevelReader(), true, field.fieldOptions().requireFieldMatch());
}
fieldQuery = cache.fieldMatchFieldQuery;
} else {
if (cache.noFieldMatchFieldQuery == null) {
// we use top level reader to rewrite the query against all readers, with use caching it across hits (and across readers...)
cache.noFieldMatchFieldQuery = new CustomFieldQuery(highlighterContext.query.originalQuery(), hitContext.topLevelReader(), true, field.fieldOptions().requireFieldMatch());
}
fieldQuery = cache.noFieldMatchFieldQuery;
}

MapperHighlightEntry entry = cache.mappers.get(mapper);
if (entry == null) {
FragListBuilder fragListBuilder;
Expand Down Expand Up @@ -140,6 +126,25 @@ public HighlightField highlight(HighlighterContext highlighterContext) {
}
cache.fvh.setPhraseLimit(field.fieldOptions().phraseLimit());

if (field.fieldOptions().skipMatching()) {
return extractNoMatchSnippetIfConfigured(highlighterContext, entry.fragmentsBuilder, encoder);
}

FieldQuery fieldQuery;
if (field.fieldOptions().requireFieldMatch()) {
if (cache.fieldMatchFieldQuery == null) {
// we use top level reader to rewrite the query against all readers, with use caching it across hits (and across readers...)
cache.fieldMatchFieldQuery = new CustomFieldQuery(highlighterContext.query.originalQuery(), hitContext.topLevelReader(), true, field.fieldOptions().requireFieldMatch());
}
fieldQuery = cache.fieldMatchFieldQuery;
} else {
if (cache.noFieldMatchFieldQuery == null) {
// we use top level reader to rewrite the query against all readers, with use caching it across hits (and across readers...)
cache.noFieldMatchFieldQuery = new CustomFieldQuery(highlighterContext.query.originalQuery(), hitContext.topLevelReader(), true, field.fieldOptions().requireFieldMatch());
}
fieldQuery = cache.noFieldMatchFieldQuery;
}

String[] fragments;

// a HACK to make highlighter do highlighting, even though its using the single frag list builder
Expand All @@ -159,25 +164,29 @@ public HighlightField highlight(HighlighterContext highlighterContext) {
return new HighlightField(highlighterContext.fieldName, StringText.convertFromStringArray(fragments));
}

int noMatchSize = highlighterContext.field.fieldOptions().noMatchSize();
if (noMatchSize > 0) {
// Essentially we just request that a fragment is built from 0 to noMatchSize using the normal fragmentsBuilder
FieldFragList fieldFragList = new SimpleFieldFragList(-1 /*ignored*/);
fieldFragList.add(0, noMatchSize, Collections.<WeightedPhraseInfo>emptyList());
fragments = entry.fragmentsBuilder.createFragments(hitContext.reader(), hitContext.docId(), mapper.names().indexName(),
fieldFragList, 1, field.fieldOptions().preTags(), field.fieldOptions().postTags(), encoder);
if (fragments != null && fragments.length > 0) {
return new HighlightField(highlighterContext.fieldName, StringText.convertFromStringArray(fragments));
}
}

return null;

return extractNoMatchSnippetIfConfigured(highlighterContext, entry.fragmentsBuilder, encoder);
} catch (Exception e) {
throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + highlighterContext.fieldName + "]", e);
}
}

private HighlightField extractNoMatchSnippetIfConfigured(HighlighterContext highlighterContext, FragmentsBuilder fragmentsBuilder,
Encoder encoder) throws IOException {
int noMatchSize = highlighterContext.field.fieldOptions().noMatchSize();
if (noMatchSize > 0) {
// Essentially we just request that a fragment is built from 0 to noMatchSize using the normal fragmentsBuilder
FieldFragList fieldFragList = new SimpleFieldFragList(-1 /*ignored*/);
fieldFragList.add(0, noMatchSize, Collections.<WeightedPhraseInfo>emptyList());
String[] fragments = fragmentsBuilder.createFragments(highlighterContext.hitContext.reader(), highlighterContext.hitContext.docId(),
highlighterContext.mapper.names().indexName(), fieldFragList, 1, highlighterContext.field.fieldOptions().preTags(),
highlighterContext.field.fieldOptions().postTags(), encoder);
if (fragments != null && fragments.length > 0) {
return new HighlightField(highlighterContext.fieldName, StringText.convertFromStringArray(fragments));
}
}
return null;
}

private class MapperHighlightEntry {
public FragListBuilder fragListBuilder;
public FragmentsBuilder fragmentsBuilder;
Expand Down
174 changes: 116 additions & 58 deletions src/main/java/org/elasticsearch/search/highlight/HighlightBuilder.java
Expand Up @@ -24,6 +24,7 @@
import org.elasticsearch.index.query.QueryBuilder;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;

Expand Down Expand Up @@ -348,70 +349,90 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
}
if (fields != null) {
builder.startObject("fields");
for (Field field : fields) {
builder.startObject(field.name());
if (field.preTags != null) {
builder.field("pre_tags", field.preTags);
}
if (field.postTags != null) {
builder.field("post_tags", field.postTags);
}
if (field.fragmentSize != -1) {
builder.field("fragment_size", field.fragmentSize);
}
if (field.numOfFragments != -1) {
builder.field("number_of_fragments", field.numOfFragments);
}
if (field.fragmentOffset != -1) {
builder.field("fragment_offset", field.fragmentOffset);
}
if (field.highlightFilter != null) {
builder.field("highlight_filter", field.highlightFilter);
}
if (field.order != null) {
builder.field("order", field.order);
}
if (field.requireFieldMatch != null) {
builder.field("require_field_match", field.requireFieldMatch);
}
if (field.boundaryMaxScan != -1) {
builder.field("boundary_max_scan", field.boundaryMaxScan);
}
if (field.boundaryChars != null) {
builder.field("boundary_chars", field.boundaryChars);
}
if (field.highlighterType != null) {
builder.field("type", field.highlighterType);
}
if (field.fragmenter != null) {
builder.field("fragmenter", field.fragmenter);
}
if (field.highlightQuery != null) {
builder.field("highlight_query", field.highlightQuery);
}
if (field.noMatchSize != null) {
builder.field("no_match_size", field.noMatchSize);
}
if (field.matchedFields != null) {
builder.field("matched_fields", field.matchedFields);
}
if (field.phraseLimit != null) {
builder.field("phrase_limit", field.phraseLimit);
}
if (field.options != null && field.options.size() > 0) {
builder.field("options", field.options);
fieldsToXContent(builder, fields);
builder.endObject();
}

builder.endObject();
return builder;
}

private static void fieldsToXContent(XContentBuilder builder, Iterable<Field> fields) throws IOException {
for (Field field : fields) {
builder.startObject(field.name());
if (field.preTags != null) {
builder.field("pre_tags", field.preTags);
}
if (field.postTags != null) {
builder.field("post_tags", field.postTags);
}
if (field.fragmentSize != -1) {
builder.field("fragment_size", field.fragmentSize);
}
if (field.numOfFragments != -1) {
builder.field("number_of_fragments", field.numOfFragments);
}
if (field.fragmentOffset != -1) {
builder.field("fragment_offset", field.fragmentOffset);
}
if (field.highlightFilter != null) {
builder.field("highlight_filter", field.highlightFilter);
}
if (field.order != null) {
builder.field("order", field.order);
}
if (field.requireFieldMatch != null) {
builder.field("require_field_match", field.requireFieldMatch);
}
if (field.boundaryMaxScan != -1) {
builder.field("boundary_max_scan", field.boundaryMaxScan);
}
if (field.boundaryChars != null) {
builder.field("boundary_chars", field.boundaryChars);
}
if (field.highlighterType != null) {
builder.field("type", field.highlighterType);
}
if (field.fragmenter != null) {
builder.field("fragmenter", field.fragmenter);
}
if (field.highlightQuery != null) {
builder.field("highlight_query", field.highlightQuery);
}
if (field.noMatchSize != null) {
builder.field("no_match_size", field.noMatchSize);
}
if (field.matchedFields != null) {
builder.field("matched_fields", field.matchedFields);
}
if (field.phraseLimit != null) {
builder.field("phrase_limit", field.phraseLimit);
}
if (field.options != null && field.options.size() > 0) {
builder.field("options", field.options);
}
if (field.forceSource != null) {
builder.field("force_source", field.forceSource);
}
if (field.matchConditionalFields != null || field.noMatchConditionalFields != null) {
builder.startObject("conditional");
if (field.matchConditionalFields != null) {
builder.startObject("match");
fieldsToXContent(builder, field.matchConditionalFields);
builder.endObject();
}
if (field.forceSource != null) {
builder.field("force_source", field.forceSource);
if (field.noMatchConditionalFields != null) {
builder.startObject("no_match");
fieldsToXContent(builder, field.noMatchConditionalFields);
builder.endObject();
}

builder.endObject();
}
if (field.skipMatching != null) {
builder.field("skip_matching", field.skipMatching);
}
builder.endObject();
}

builder.endObject();
return builder;
}

public static class Field {
Expand All @@ -434,6 +455,9 @@ public static class Field {
Integer phraseLimit;
Map<String, Object> options;
Boolean forceSource;
List<Field> matchConditionalFields;
List<Field> noMatchConditionalFields;
Boolean skipMatching;

public Field(String name) {
this.name = name;
Expand Down Expand Up @@ -584,5 +608,39 @@ public Field forceSource(boolean forceSource) {
return this;
}

/**
* Add a field against which highlighting should be tried only if this field doesn't have any matches.
* @param field field to highlight
* @return this for chaining
*/
public Field addNoMatchConditionalField(Field field) {
if (noMatchConditionalFields == null) {
noMatchConditionalFields = new ArrayList<Field>();
}
noMatchConditionalFields.add(field);
return this;
}

/**
* Add a field against which highlighting should be tried only if this field has matches.
* @param field field to highlight
* @return this for chaining
*/
public Field addMatchConditionalField(Field field) {
if (matchConditionalFields == null) {
matchConditionalFields = new ArrayList<Field>();
}
matchConditionalFields.add(field);
return this;
}

/**
* Should the highlighter skip its normal highlighting and just perform its noMatchSize action?
* @return this for chaining
*/
public Field skipMatching(boolean skipMatching) {
this.skipMatching = skipMatching;
return this;
}
}
}

0 comments on commit 96997b3

Please sign in to comment.