Skip to content

Commit

Permalink
Expose fragmenter option for plain / normal highlighter.
Browse files Browse the repository at this point in the history
Closes #2465
  • Loading branch information
martijnvg committed Dec 6, 2012
1 parent c2f8ee1 commit f72d5c1
Show file tree
Hide file tree
Showing 5 changed files with 109 additions and 6 deletions.
Expand Up @@ -50,6 +50,7 @@ public class HighlightBuilder implements ToXContent {

private String highlighterType;

private String fragmenter;

/**
* Adds a field to be highlighted with default fragment size of 100 characters, and
Expand Down Expand Up @@ -188,6 +189,15 @@ public HighlightBuilder highlighterType(String highlighterType) {
return this;
}

/**
* Sets what fragmenter to use to break up text that is eligible for highlighting.
* This option is only applicable when using plain / normal highlighter.
*/
public HighlightBuilder fragmenter(String fragmenter) {
this.fragmenter = fragmenter;
return this;
}

@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject("highlight");
Expand All @@ -212,6 +222,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
if (highlighterType != null) {
builder.field("type", highlighterType);
}
if (fragmenter != null) {
builder.field("fragmenter", fragmenter);
}
if (fields != null) {
builder.startObject("fields");
for (Field field : fields) {
Expand All @@ -231,6 +244,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
if (field.highlighterType != null) {
builder.field("type", field.highlighterType);
}
if (field.fragmenter != null) {
builder.field("fragmenter", field.fragmenter);
}

builder.endObject();
}
Expand All @@ -248,6 +264,7 @@ public static class Field {
int numOfFragments = -1;
Boolean requireFieldMatch;
String highlighterType;
String fragmenter;

public Field(String name) {
this.name = name;
Expand Down Expand Up @@ -281,5 +298,10 @@ public Field highlighterType(String highlighterType) {
this.highlighterType = highlighterType;
return this;
}

public Field fragmenter(String fragmenter) {
this.fragmenter = fragmenter;
return this;
}
}
}
Expand Up @@ -30,6 +30,7 @@
import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.vectorhighlight.*;
import org.elasticsearch.ElasticSearchException;
import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.elasticsearch.common.component.AbstractComponent;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.io.FastStringReader;
Expand Down Expand Up @@ -131,13 +132,13 @@ public void hitExecute(SearchContext context, HitContext hitContext) throws Elas
useFastVectorHighlighter = mapper.storeTermVectors() && mapper.storeTermVectorOffsets() && mapper.storeTermVectorPositions();
} else if (field.highlighterType().equals("fast-vector-highlighter") || field.highlighterType().equals("fvh")) {
if (!(mapper.storeTermVectors() && mapper.storeTermVectorOffsets() && mapper.storeTermVectorPositions())) {
throw new FetchPhaseExecutionException(context, "the field [" + field.field() + "] should be indexed with term vector with position offsets to be used with fast vector highlighter");
throw new ElasticSearchIllegalArgumentException("the field [" + field.field() + "] should be indexed with term vector with position offsets to be used with fast vector highlighter");
}
useFastVectorHighlighter = true;
} else if (field.highlighterType().equals("highlighter") || field.highlighterType().equals("plain")) {
useFastVectorHighlighter = false;
} else {
throw new FetchPhaseExecutionException(context, "unknown highlighter type [" + field.highlighterType() + "] for the field [" + field.field() + "]");
throw new ElasticSearchIllegalArgumentException("unknown highlighter type [" + field.highlighterType() + "] for the field [" + field.field() + "]");
}
if (!useFastVectorHighlighter) {
MapperHighlightEntry entry = cache.mappers.get(mapper);
Expand All @@ -151,8 +152,14 @@ public void hitExecute(SearchContext context, HitContext hitContext) throws Elas
Fragmenter fragmenter;
if (field.numberOfFragments() == 0) {
fragmenter = new NullFragmenter();
} else {
} else if (field.fragmenter() == null) {
fragmenter = new SimpleSpanFragmenter(queryScorer, field.fragmentCharSize());
} else if ("simple".equals(field.fragmenter())) {
fragmenter = new SimpleFragmenter(field.fragmentCharSize());
} else if ("span".equals(field.fragmenter())) {
fragmenter = new SimpleSpanFragmenter(queryScorer, field.fragmentCharSize());
} else {
throw new ElasticSearchIllegalArgumentException("unknown fragmenter option [" + field.fragmenter() + "] for the field [" + field.field() + "]");
}
Formatter formatter = new SimpleHTMLFormatter(field.preTags()[0], field.postTags()[0]);

Expand Down
Expand Up @@ -77,6 +77,7 @@ public void parse(XContentParser parser, SearchContext context) throws Exception
int globalBoundaryMaxScan = SimpleBoundaryScanner2.DEFAULT_MAX_SCAN;
char[] globalBoundaryChars = SimpleBoundaryScanner2.DEFAULT_BOUNDARY_CHARS;
String globalHighlighterType = null;
String globalFragmenter = null;

while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
if (token == XContentParser.Token.FIELD_NAME) {
Expand Down Expand Up @@ -120,6 +121,8 @@ public void parse(XContentParser parser, SearchContext context) throws Exception
globalBoundaryChars = parser.text().toCharArray();
} else if ("type".equals(topLevelFieldName)) {
globalHighlighterType = parser.text();
} else if ("fragmenter".equals(topLevelFieldName)) {
globalFragmenter = parser.text();
}
} else if (token == XContentParser.Token.START_OBJECT) {
if ("fields".equals(topLevelFieldName)) {
Expand Down Expand Up @@ -166,6 +169,8 @@ public void parse(XContentParser parser, SearchContext context) throws Exception
field.boundaryChars(parser.text().toCharArray());
} else if ("type".equals(fieldName)) {
field.highlighterType(parser.text());
} else if ("fragmenter".equals(fieldName)) {
field.fragmenter(parser.text());
}
}
}
Expand Down Expand Up @@ -214,6 +219,9 @@ public void parse(XContentParser parser, SearchContext context) throws Exception
if (field.highlighterType() == null) {
field.highlighterType(globalHighlighterType);
}
if (field.fragmenter() == null) {
field.fragmenter(globalFragmenter);
}
}

context.highlight(new SearchContextHighlight(fields));
Expand Down
Expand Up @@ -60,6 +60,8 @@ public static class Field {

private String highlighterType;

private String fragmenter;

private int boundaryMaxScan = -1;
private char[] boundaryChars = null;

Expand Down Expand Up @@ -151,6 +153,14 @@ public void highlighterType(String type) {
this.highlighterType = type;
}

public String fragmenter() {
return fragmenter;
}

public void fragmenter(String fragmenter) {
this.fragmenter = fragmenter;
}

public int boundaryMaxScan() {
return boundaryMaxScan;
}
Expand Down
Expand Up @@ -20,14 +20,17 @@
package org.elasticsearch.test.integration.search.highlight;

import org.elasticsearch.ElasticSearchException;
import org.elasticsearch.action.search.SearchPhaseExecutionException;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.search.SearchType;
import org.elasticsearch.client.Client;
import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.index.query.MatchQueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.indices.IndexMissingException;
import org.elasticsearch.rest.RestStatus;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.highlight.HighlightBuilder;
Expand All @@ -49,6 +52,7 @@
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.instanceOf;
import static org.testng.Assert.fail;

/**
*
Expand Down Expand Up @@ -915,8 +919,60 @@ public void testFSHHighlightAllMvFragments() throws Exception {
.addHighlightedField("tags", -1, 0)
.execute().actionGet();

assertThat(2, equalTo(response.hits().hits()[0].highlightFields().get("tags").fragments().length));
assertThat("this is a really long <em>tag</em> i would like to highlight", equalTo(response.hits().hits()[0].highlightFields().get("tags").fragments()[0].string()));
assertThat("here is another one that is very long and has the <em>tag</em> token near the end", equalTo(response.hits().hits()[0].highlightFields().get("tags").fragments()[1].string()));
assertThat(response.hits().hits()[0].highlightFields().get("tags").fragments().length, equalTo(2));
assertThat(response.hits().hits()[0].highlightFields().get("tags").fragments()[0].string(), equalTo("this is a really long <em>tag</em> i would like to highlight"));
assertThat(response.hits().hits()[0].highlightFields().get("tags").fragments()[1].string(), equalTo("here is another one that is very long and has the <em>tag</em> token near the end"));
}

@Test
public void testPlainHighlightDifferentFragmenter() throws Exception {
try {
client.admin().indices().prepareDelete("test").execute().actionGet();
} catch (Exception e) {
// ignore
}

client.admin().indices().prepareCreate("test").setSettings(ImmutableSettings.settingsBuilder()
.put("number_of_shards", 1).put("number_of_replicas", 0))
.addMapping("type1", jsonBuilder().startObject().startObject("type1").startObject("properties")
.startObject("tags").field("type", "string").endObject()
.endObject().endObject().endObject())
.execute().actionGet();

client.prepareIndex("test", "type1", "1")
.setSource(jsonBuilder().startObject().field("tags",
"this is a really long tag i would like to highlight",
"here is another one that is very long tag and has the tag token near the end").endObject())
.setRefresh(true).execute().actionGet();

SearchResponse response = client.prepareSearch("test")
.setQuery(QueryBuilders.matchQuery("tags", "long tag").type(MatchQueryBuilder.Type.PHRASE))
.addHighlightedField(new HighlightBuilder.Field("tags")
.fragmentSize(-1).numOfFragments(2).fragmenter("simple"))
.execute().actionGet();
assertThat(response.hits().hits()[0].highlightFields().get("tags").fragments().length, equalTo(2));
assertThat(response.hits().hits()[0].highlightFields().get("tags").fragments()[0].string(), equalTo("this is a really <em>long</em> <em>tag</em> i would like to highlight"));
assertThat(response.hits().hits()[0].highlightFields().get("tags").fragments()[1].string(), equalTo("here is another one that is very <em>long</em> <em>tag</em> and has the tag token near the end"));

response = client.prepareSearch("test")
.setQuery(QueryBuilders.matchQuery("tags", "long tag").type(MatchQueryBuilder.Type.PHRASE))
.addHighlightedField(new HighlightBuilder.Field("tags")
.fragmentSize(-1).numOfFragments(2).fragmenter("span"))
.execute().actionGet();
assertThat(response.hits().hits()[0].highlightFields().get("tags").fragments().length, equalTo(2));
assertThat(response.hits().hits()[0].highlightFields().get("tags").fragments()[0].string(), equalTo("this is a really <em>long</em> <em>tag</em> i would like to highlight"));
assertThat(response.hits().hits()[0].highlightFields().get("tags").fragments()[1].string(), equalTo("here is another one that is very <em>long</em> <em>tag</em> and has the tag token near the end"));

try {
client.prepareSearch("test")
.setQuery(QueryBuilders.matchQuery("tags", "long tag").type(MatchQueryBuilder.Type.PHRASE))
.addHighlightedField(new HighlightBuilder.Field("tags")
.fragmentSize(-1).numOfFragments(2).fragmenter("invalid"))
.execute().actionGet();
fail("Shouldn't get here");
} catch (SearchPhaseExecutionException e) {
assertThat(e.shardFailures()[0].status(), equalTo(RestStatus.BAD_REQUEST));
}
}

}

0 comments on commit f72d5c1

Please sign in to comment.