Skip to content

Commit

Permalink
inner_hits: Don't use bitset cache for children filters.
Browse files Browse the repository at this point in the history
Only parent filters should use bitset filter cache, to avoid memory being wasted.
Also in case of object fields inline the field name into the nested object,
instead of creating an additional (dummy) nested identity.

Closes elastic#10662
Closes elastic#10629
  • Loading branch information
martijnvg committed Apr 30, 2015
1 parent cb2f3aa commit bd70e38
Show file tree
Hide file tree
Showing 3 changed files with 93 additions and 24 deletions.
25 changes: 20 additions & 5 deletions src/main/java/org/elasticsearch/index/mapper/DocumentMapper.java
Expand Up @@ -27,9 +27,10 @@
import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Filter;
import org.apache.lucene.util.CloseableThreadLocal;
import org.apache.lucene.util.FixedBitSet;
import org.elasticsearch.ElasticsearchGenerationException;
import org.elasticsearch.ElasticsearchIllegalArgumentException;
import org.elasticsearch.common.Booleans;
Expand All @@ -45,14 +46,14 @@
import org.elasticsearch.common.text.Text;
import org.elasticsearch.common.xcontent.*;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.cache.fixedbitset.FixedBitSetFilterCache;
import org.elasticsearch.index.mapper.internal.*;
import org.elasticsearch.index.mapper.object.ObjectMapper;
import org.elasticsearch.index.mapper.object.RootObjectMapper;
import org.elasticsearch.script.ExecutableScript;
import org.elasticsearch.script.ScriptContext;
import org.elasticsearch.script.ScriptService;
import org.elasticsearch.script.ScriptService.ScriptType;
import org.elasticsearch.search.internal.SearchContext;

import java.io.IOException;
import java.util.*;
Expand Down Expand Up @@ -598,15 +599,29 @@ public ParsedDocument parse(SourceToParse source, @Nullable ParseListener listen
/**
* Returns the best nested {@link ObjectMapper} instances that is in the scope of the specified nested docId.
*/
public ObjectMapper findNestedObjectMapper(int nestedDocId, FixedBitSetFilterCache cache, AtomicReaderContext context) throws IOException {
public ObjectMapper findNestedObjectMapper(int nestedDocId, SearchContext sc, AtomicReaderContext context) throws IOException {
ObjectMapper nestedObjectMapper = null;
for (ObjectMapper objectMapper : objectMappers().values()) {
if (!objectMapper.nested().isNested()) {
continue;
}

FixedBitSet nestedTypeBitSet = cache.getFixedBitSetFilter(objectMapper.nestedTypeFilter()).getDocIdSet(context, null);
if (nestedTypeBitSet != null && nestedTypeBitSet.get(nestedDocId)) {
Filter filter = sc.filterCache().cache(objectMapper.nestedTypeFilter());
if (filter == null) {
continue;
}
// We can pass down 'null' as acceptedDocs, because nestedDocId is a doc to be fetched and
// therefor is guaranteed to be a live doc.
DocIdSet nestedTypeSet = filter.getDocIdSet(context, null);
if (nestedTypeSet == null) {
continue;
}
DocIdSetIterator iterator = nestedTypeSet.iterator();
if (iterator == null) {
continue;
}

if (iterator.advance(nestedDocId) == nestedDocId) {
if (nestedObjectMapper == null) {
nestedObjectMapper = objectMapper;
} else {
Expand Down
48 changes: 34 additions & 14 deletions src/main/java/org/elasticsearch/search/fetch/FetchPhase.java
Expand Up @@ -23,6 +23,8 @@
import com.google.common.collect.ImmutableMap;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Filter;
import org.apache.lucene.util.FixedBitSet;
import org.elasticsearch.ElasticsearchIllegalArgumentException;
Expand Down Expand Up @@ -281,7 +283,7 @@ private InternalSearchHit createNestedSearchHit(SearchContext context, int neste
context.lookup().setNextReader(subReaderContext);
context.lookup().setNextDocId(nestedSubDocId);

ObjectMapper nestedObjectMapper = documentMapper.findNestedObjectMapper(nestedSubDocId, context.fixedBitSetFilterCache(), subReaderContext);
ObjectMapper nestedObjectMapper = documentMapper.findNestedObjectMapper(nestedSubDocId, context, subReaderContext);
assert nestedObjectMapper != null;
InternalSearchHit.InternalNestedIdentity nestedIdentity = getInternalNestedIdentity(context, nestedSubDocId, subReaderContext, documentMapper, nestedObjectMapper);

Expand Down Expand Up @@ -368,36 +370,54 @@ private Map<String, SearchHitField> getSearchFields(SearchContext context, int n
private InternalSearchHit.InternalNestedIdentity getInternalNestedIdentity(SearchContext context, int nestedSubDocId, AtomicReaderContext subReaderContext, DocumentMapper documentMapper, ObjectMapper nestedObjectMapper) throws IOException {
int currentParent = nestedSubDocId;
ObjectMapper nestedParentObjectMapper;
StringBuilder field = new StringBuilder();
ObjectMapper current = nestedObjectMapper;
InternalSearchHit.InternalNestedIdentity nestedIdentity = null;
do {
String field;
Filter parentFilter;
nestedParentObjectMapper = documentMapper.findParentObjectMapper(nestedObjectMapper);
nestedParentObjectMapper = documentMapper.findParentObjectMapper(current);
if (field.length() != 0) {
field.insert(0, '.');
}
field.insert(0, current.name());
if (nestedParentObjectMapper != null) {
field = nestedObjectMapper.name();
if (!nestedParentObjectMapper.nested().isNested()) {
nestedObjectMapper = nestedParentObjectMapper;
// all right, the parent is a normal object field, so this is the best identiy we can give for that:
nestedIdentity = new InternalSearchHit.InternalNestedIdentity(field, 0, nestedIdentity);
if (nestedParentObjectMapper.nested().isNested() == false) {
current = nestedParentObjectMapper;
continue;
}
parentFilter = nestedParentObjectMapper.nestedTypeFilter();
} else {
field = nestedObjectMapper.fullPath();
parentFilter = NonNestedDocsFilter.INSTANCE;
}

Filter childFilter = context.filterCache().cache(nestedObjectMapper.nestedTypeFilter());
if (childFilter == null) {
current = nestedParentObjectMapper;
continue;
}
// We can pass down 'null' as acceptedDocs, because we're fetching matched docId that matched in the query phase.
DocIdSet childDocSet = childFilter.getDocIdSet(subReaderContext, null);
if (childDocSet == null) {
current = nestedParentObjectMapper;
continue;
}
DocIdSetIterator childIter = childDocSet.iterator();
if (childIter == null) {
current = nestedParentObjectMapper;
continue;
}

FixedBitSet parentBitSet = context.fixedBitSetFilterCache().getFixedBitSetFilter(parentFilter).getDocIdSet(subReaderContext, null);
int offset = 0;
FixedBitSet nestedDocsBitSet = context.fixedBitSetFilterCache().getFixedBitSetFilter(nestedObjectMapper.nestedTypeFilter()).getDocIdSet(subReaderContext, null);
int nextParent = parentBitSet.nextSetBit(currentParent);
for (int docId = nestedDocsBitSet.nextSetBit(currentParent + 1); docId < nextParent && docId != -1; docId = nestedDocsBitSet.nextSetBit(docId + 1)) {
for (int docId = childIter.advance(currentParent + 1); docId < nextParent && docId != DocIdSetIterator.NO_MORE_DOCS; docId = childIter.nextDoc()) {
offset++;
}
currentParent = nextParent;
nestedObjectMapper = nestedParentObjectMapper;
nestedIdentity = new InternalSearchHit.InternalNestedIdentity(field, offset, nestedIdentity);
} while (nestedParentObjectMapper != null);
current = nestedObjectMapper = nestedParentObjectMapper;
nestedIdentity = new InternalSearchHit.InternalNestedIdentity(field.toString(), offset, nestedIdentity);
field = new StringBuilder();
} while (current != null);
return nestedIdentity;
}

Expand Down
Expand Up @@ -865,7 +865,12 @@ public void testInnerHitsWithObjectFieldThatHasANestedField() throws Exception {
List<IndexRequestBuilder> requests = new ArrayList<>();
requests.add(client().prepareIndex("articles", "article", "1").setSource(jsonBuilder().startObject()
.field("title", "quick brown fox")
.startObject("comments").startObject("messages").field("message", "fox eat quick").endObject().endObject()
.startObject("comments")
.startArray("messages")
.startObject().field("message", "fox eat quick").endObject()
.startObject().field("message", "bear eat quick").endObject()
.endArray()
.endObject()
.endObject()));
indexRandom(true, requests);

Expand All @@ -877,11 +882,40 @@ public void testInnerHitsWithObjectFieldThatHasANestedField() throws Exception {
assertThat(response.getHits().getAt(0).id(), equalTo("1"));
assertThat(response.getHits().getAt(0).getInnerHits().get("comments.messages").getTotalHits(), equalTo(1l));
assertThat(response.getHits().getAt(0).getInnerHits().get("comments.messages").getAt(0).id(), equalTo("1"));
assertThat(response.getHits().getAt(0).getInnerHits().get("comments.messages").getAt(0).getNestedIdentity().getField().string(), equalTo("comments"));
assertThat(response.getHits().getAt(0).getInnerHits().get("comments.messages").getAt(0).getNestedIdentity().getField().string(), equalTo("comments.messages"));
assertThat(response.getHits().getAt(0).getInnerHits().get("comments.messages").getAt(0).getNestedIdentity().getOffset(), equalTo(0));
assertThat(response.getHits().getAt(0).getInnerHits().get("comments.messages").getAt(0).getNestedIdentity().getChild(), nullValue());

response = client().prepareSearch("articles")
.setQuery(nestedQuery("comments.messages", matchQuery("comments.messages.message", "bear")).innerHit(new QueryInnerHitBuilder()))
.get();
assertNoFailures(response);
assertHitCount(response, 1);
assertThat(response.getHits().getAt(0).id(), equalTo("1"));
assertThat(response.getHits().getAt(0).getInnerHits().get("comments.messages").getTotalHits(), equalTo(1l));
assertThat(response.getHits().getAt(0).getInnerHits().get("comments.messages").getAt(0).id(), equalTo("1"));
assertThat(response.getHits().getAt(0).getInnerHits().get("comments.messages").getAt(0).getNestedIdentity().getField().string(), equalTo("comments.messages"));
assertThat(response.getHits().getAt(0).getInnerHits().get("comments.messages").getAt(0).getNestedIdentity().getOffset(), equalTo(1));
assertThat(response.getHits().getAt(0).getInnerHits().get("comments.messages").getAt(0).getNestedIdentity().getChild(), nullValue());

// index the message in an object form instead of an array
requests = new ArrayList<>();
requests.add(client().prepareIndex("articles", "article", "1").setSource(jsonBuilder().startObject()
.field("title", "quick brown fox")
.startObject("comments").startObject("messages").field("message", "fox eat quick").endObject().endObject()
.endObject()));
indexRandom(true, requests);
response = client().prepareSearch("articles")
.setQuery(nestedQuery("comments.messages", matchQuery("comments.messages.message", "fox")).innerHit(new QueryInnerHitBuilder()))
.get();
assertNoFailures(response);
assertHitCount(response, 1);
assertThat(response.getHits().getAt(0).id(), equalTo("1"));
assertThat(response.getHits().getAt(0).getInnerHits().get("comments.messages").getTotalHits(), equalTo(1l));
assertThat(response.getHits().getAt(0).getInnerHits().get("comments.messages").getAt(0).id(), equalTo("1"));
assertThat(response.getHits().getAt(0).getInnerHits().get("comments.messages").getAt(0).getNestedIdentity().getField().string(), equalTo("comments.messages"));
assertThat(response.getHits().getAt(0).getInnerHits().get("comments.messages").getAt(0).getNestedIdentity().getOffset(), equalTo(0));
assertThat(response.getHits().getAt(0).getInnerHits().get("comments.messages").getAt(0).getNestedIdentity().getChild().getField().string(), equalTo("messages"));
assertThat(response.getHits().getAt(0).getInnerHits().get("comments.messages").getAt(0).getNestedIdentity().getChild().getOffset(), equalTo(0));
assertThat(response.getHits().getAt(0).getInnerHits().get("comments.messages").getAt(0).getNestedIdentity().getChild().getChild(), nullValue());
assertThat(response.getHits().getAt(0).getInnerHits().get("comments.messages").getAt(0).getNestedIdentity().getChild(), nullValue());
}

}

0 comments on commit bd70e38

Please sign in to comment.