Skip to content

Commit

Permalink
Highlighting: Automatically use the field values extracted from _sour…
Browse files Browse the repository at this point in the history
…ce if not stored explicitly in the mapping, closes elastic#561.
  • Loading branch information
kimchy committed Dec 12, 2010
1 parent fe3f5d4 commit 216b2ab
Show file tree
Hide file tree
Showing 5 changed files with 307 additions and 9 deletions.
Expand Up @@ -27,9 +27,12 @@
import org.elasticsearch.index.mapper.DocumentMapper;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.index.mapper.Uid;
import org.elasticsearch.search.SearchException;
import org.elasticsearch.search.SearchParseElement;
import org.elasticsearch.search.fetch.FetchPhaseExecutionException;
import org.elasticsearch.search.fetch.SearchHitPhase;
import org.elasticsearch.search.highlight.vectorhighlight.SourceScoreOrderFragmentsBuilder;
import org.elasticsearch.search.highlight.vectorhighlight.SourceSimpleFragmentsBuilder;
import org.elasticsearch.search.internal.InternalSearchHit;
import org.elasticsearch.search.internal.SearchContext;

Expand Down Expand Up @@ -57,20 +60,19 @@ public class HighlightPhase implements SearchHitPhase {

Map<String, HighlightField> highlightFields = newHashMap();
for (SearchContextHighlight.Field field : context.highlight().fields()) {
String fieldName = field.field();
FieldMapper mapper = documentMapper.mappers().smartNameFieldMapper(field.field());
if (mapper != null) {
fieldName = mapper.names().indexName();
if (mapper == null) {
throw new SearchException(context.shardTarget(), "No mapping found for [" + field.field() + "]");
}

FastVectorHighlighter highlighter = buildHighlighter(field);
FastVectorHighlighter highlighter = buildHighlighter(context, mapper, field);
FieldQuery fieldQuery = buildFieldQuery(highlighter, context.query(), reader, field);

String[] fragments;
try {
// a HACK to make highlighter do highlighting, even though its using the single frag list builder
int numberOfFragments = field.numberOfFragments() == 0 ? 1 : field.numberOfFragments();
fragments = highlighter.getBestFragments(fieldQuery, context.searcher().getIndexReader(), docId, fieldName, field.fragmentCharSize(), numberOfFragments);
fragments = highlighter.getBestFragments(fieldQuery, context.searcher().getIndexReader(), docId, mapper.names().indexName(), field.fragmentCharSize(), numberOfFragments);
} catch (IOException e) {
throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + field.field() + "]", e);
}
Expand All @@ -91,18 +93,30 @@ private FieldQuery buildFieldQuery(FastVectorHighlighter highlighter, Query quer
return new CustomFieldQuery(query, highlighter);
}

private FastVectorHighlighter buildHighlighter(SearchContextHighlight.Field field) {
private FastVectorHighlighter buildHighlighter(SearchContext searchContext, FieldMapper fieldMapper, SearchContextHighlight.Field field) {
FragListBuilder fragListBuilder;
FragmentsBuilder fragmentsBuilder;
if (field.numberOfFragments() == 0) {
fragListBuilder = new SingleFragListBuilder();
fragmentsBuilder = new SimpleFragmentsBuilder(field.preTags(), field.postTags());
if (fieldMapper.stored()) {
fragmentsBuilder = new SimpleFragmentsBuilder(field.preTags(), field.postTags());
} else {
fragmentsBuilder = new SourceSimpleFragmentsBuilder(fieldMapper, searchContext, field.preTags(), field.postTags());
}
} else {
fragListBuilder = new SimpleFragListBuilder();
if (field.scoreOrdered()) {
fragmentsBuilder = new ScoreOrderFragmentsBuilder(field.preTags(), field.postTags());
if (fieldMapper.stored()) {
fragmentsBuilder = new ScoreOrderFragmentsBuilder(field.preTags(), field.postTags());
} else {
fragmentsBuilder = new SourceScoreOrderFragmentsBuilder(fieldMapper, searchContext, field.preTags(), field.postTags());
}
} else {
fragmentsBuilder = new SimpleFragmentsBuilder(field.preTags(), field.postTags());
if (fieldMapper.stored()) {
fragmentsBuilder = new SimpleFragmentsBuilder(field.preTags(), field.postTags());
} else {
fragmentsBuilder = new SourceSimpleFragmentsBuilder(fieldMapper, searchContext, field.preTags(), field.postTags());
}
}
}

Expand Down
@@ -0,0 +1,61 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.elasticsearch.search.highlight.vectorhighlight;

import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.vectorhighlight.ScoreOrderFragmentsBuilder;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.search.internal.SearchContext;
import org.elasticsearch.search.lookup.SearchLookup;

import java.io.IOException;
import java.util.List;

/**
* @author kimchy (shay.banon)
*/
public class SourceScoreOrderFragmentsBuilder extends ScoreOrderFragmentsBuilder {

private final FieldMapper mapper;

private final SearchContext searchContext;

public SourceScoreOrderFragmentsBuilder(FieldMapper mapper, SearchContext searchContext,
String[] preTags, String[] postTags) {
super(preTags, postTags);
this.mapper = mapper;
this.searchContext = searchContext;
}

@Override protected Field[] getFields(IndexReader reader, int docId, String fieldName) throws IOException {
// we know its low level reader, and matching docId, since that's how we call the highlighter with
SearchLookup lookup = searchContext.lookup();
lookup.setNextReader(reader);
lookup.setNextDocId(docId);

List<Object> values = lookup.source().getValues(mapper.names().fullName());
Field[] fields = new Field[values.size()];
for (int i = 0; i < values.size(); i++) {
fields[i] = new Field(mapper.names().indexName(), values.get(i).toString(), Field.Store.NO, Field.Index.ANALYZED);
}
return fields;
}
}
@@ -0,0 +1,67 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.elasticsearch.search.highlight.vectorhighlight;

import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.vectorhighlight.SimpleFragmentsBuilder;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.search.internal.SearchContext;
import org.elasticsearch.search.lookup.SearchLookup;

import java.io.IOException;
import java.util.List;

/**
* @author kimchy (shay.banon)
*/
public class SourceSimpleFragmentsBuilder extends SimpleFragmentsBuilder {

private final FieldMapper mapper;

private final SearchContext searchContext;

public SourceSimpleFragmentsBuilder(FieldMapper mapper, SearchContext searchContext,
String[] preTags, String[] postTags) {
super(preTags, postTags);
this.mapper = mapper;
this.searchContext = searchContext;
}

public static Field[] EMPTY_FIELDS = new Field[0];

@Override protected Field[] getFields(IndexReader reader, int docId, String fieldName) throws IOException {
// we know its low level reader, and matching docId, since that's how we call the highlighter with
SearchLookup lookup = searchContext.lookup();
lookup.setNextReader(reader);
lookup.setNextDocId(docId);

List<Object> values = lookup.source().getValues(mapper.names().fullName());
if (values.isEmpty()) {
return EMPTY_FIELDS;
}
Field[] fields = new Field[values.size()];
for (int i = 0; i < values.size(); i++) {
fields[i] = new Field(mapper.names().indexName(), values.get(i).toString(), Field.Store.NO, Field.Index.ANALYZED);
}
return fields;
}

}
Expand Up @@ -23,6 +23,7 @@
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.IndexReader;
import org.elasticsearch.ElasticSearchParseException;
import org.elasticsearch.common.collect.Lists;
import org.elasticsearch.common.compress.lzf.LZFDecoder;
import org.elasticsearch.common.io.stream.BytesStreamInput;
import org.elasticsearch.common.io.stream.CachedStreamInput;
Expand All @@ -34,12 +35,15 @@
import org.elasticsearch.index.mapper.SourceFieldSelector;

import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;

/**
* @author kimchy (shay.banon)
*/
// TODO: If we are processing it in the per hit fetch phase, we cna initialize it with a source if it was loaded..
public class SourceLookup implements Map {

private IndexReader reader;
Expand Down Expand Up @@ -99,6 +103,54 @@ public void setNextDocId(int docId) {
this.source = null;
}

private final static Pattern dotPattern = Pattern.compile("\\.");

/**
* Returns the values associated with the path. Those are "low" level values, and it can
* handle path expression where an array/list is navigated within.
*/
public List<Object> getValues(String path) {
List<Object> values = Lists.newArrayList();
String[] pathElements = dotPattern.split(path);
getValues(values, loadSourceIfNeeded(), pathElements, 0);
return values;
}

@SuppressWarnings({"unchecked"})
private void getValues(List<Object> values, Map<String, Object> part, String[] pathElements, int index) {
if (index == pathElements.length) {
return;
}
String currentPath = pathElements[index];
Object currentValue = part.get(currentPath);
if (currentValue == null) {
return;
}
if (currentValue instanceof Map) {
getValues(values, (Map<String, Object>) currentValue, pathElements, index + 1);
} else if (currentValue instanceof List) {
getValues(values, (List<Object>) currentValue, pathElements, index + 1);
} else {
values.add(currentValue);
}
}

@SuppressWarnings({"unchecked"})
private void getValues(List<Object> values, List<Object> part, String[] pathElements, int index) {
for (Object value : part) {
if (value == null) {
continue;
}
if (value instanceof Map) {
getValues(values, (Map<String, Object>) value, pathElements, index);
} else if (value instanceof List) {
getValues(values, (List<Object>) value, pathElements, index);
} else {
values.add(value);
}
}
}

@Override public Object get(Object key) {
return loadSourceIfNeeded().get(key);
}
Expand Down
@@ -0,0 +1,104 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.elasticsearch.test.integration.search.highlight;

import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.Client;
import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.test.integration.AbstractNodesTests;
import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;

import static org.elasticsearch.common.xcontent.XContentFactory.*;
import static org.elasticsearch.index.query.xcontent.QueryBuilders.*;
import static org.hamcrest.MatcherAssert.*;
import static org.hamcrest.Matchers.*;

/**
* @author kimchy (shay.banon)
*/
public class SourceFieldHighlightingTests extends AbstractNodesTests {

private Client client;

@BeforeClass public void createNodes() throws Exception {
startNode("node1");
client = getClient();
}

@AfterClass public void closeNodes() {
client.close();
closeAllNodes();
}

protected Client getClient() {
return client("node1");
}

@Test public void testSourceLookupHighlighting() throws Exception {
try {
client.admin().indices().prepareDelete("test").execute().actionGet();
} catch (Exception e) {
// ignore
}

client.admin().indices().prepareCreate("test").setSettings(ImmutableSettings.settingsBuilder().put("number_of_shards", 2))
.addMapping("type1", jsonBuilder().startObject().startObject("type1").startObject("properties")
// we don't store title, now lets see if it works...
.startObject("title").field("type", "string").field("store", "no").field("term_vector", "with_positions_offsets").endObject()
.startObject("attachments").startObject("properties").startObject("body").field("type", "string").field("term_vector", "with_positions_offsets").endObject().endObject().endObject()
.endObject().endObject().endObject())
.execute().actionGet();

for (int i = 0; i < 5; i++) {
client.prepareIndex("test", "type1", Integer.toString(i))
.setSource(XContentFactory.jsonBuilder().startObject()
.field("title", "This is a test on the highlighting bug present in elasticsearch")
.startArray("attachments").startObject().field("body", "attachment 1").endObject().startObject().field("body", "attachment 2").endObject().endArray()
.endObject())
.setRefresh(true).execute().actionGet();
}

SearchResponse search = client.prepareSearch()
.setQuery(fieldQuery("title", "bug"))
.addHighlightedField("title", -1, 0)
.execute().actionGet();

assertThat(search.hits().totalHits(), equalTo(5l));

for (SearchHit hit : search.hits()) {
assertThat(hit.highlightFields().get("title").fragments()[0], equalTo("This is a test on the highlighting <em>bug</em> present in elasticsearch"));
}

search = client.prepareSearch()
.setQuery(fieldQuery("attachments.body", "attachment"))
.addHighlightedField("attachments.body", -1, 0)
.execute().actionGet();

assertThat(search.hits().totalHits(), equalTo(5l));

for (SearchHit hit : search.hits()) {
assertThat(hit.highlightFields().get("attachments.body").fragments()[0], equalTo("<em>attachment</em> 1 <em>attachment</em> 2"));
}
}
}

0 comments on commit 216b2ab

Please sign in to comment.