Skip to content

Commit

Permalink
Don't report terms as live if all it's docs are filtered out
Browse files Browse the repository at this point in the history
FilterableTermsEnum allows to filter stats by supplying per segment
bits. Today if all docs are filtered out the term is still reported as
live but shouldn't.

Relates to #6211
  • Loading branch information
s1monw committed May 19, 2014
1 parent 9e543b4 commit 6de8dd2
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 37 deletions.
Expand Up @@ -124,48 +124,50 @@ public BytesRef term() throws IOException {

@Override
public boolean seekExact(BytesRef text) throws IOException {
boolean found = false;
currentDocFreq = NOT_FOUND;
currentTotalTermFreq = NOT_FOUND;
int docFreq = 0;
long totalTermFreq = 0;
for (Holder anEnum : enums) {
if (!anEnum.termsEnum.seekExact(text)) {
continue;
}
found = true;
if (anEnum.bits == null) {
docFreq += anEnum.termsEnum.docFreq();
if (docsEnumFlag == DocsEnum.FLAG_FREQS) {
long leafTotalTermFreq = anEnum.termsEnum.totalTermFreq();
if (totalTermFreq == -1 || leafTotalTermFreq == -1) {
totalTermFreq = -1;
continue;
}
totalTermFreq += leafTotalTermFreq;
}
} else {
DocsEnum docsEnum = anEnum.docsEnum = anEnum.termsEnum.docs(anEnum.bits, anEnum.docsEnum, docsEnumFlag);
// 2 choices for performing same heavy loop - one attempts to calculate totalTermFreq and other does not
if (docsEnumFlag == DocsEnum.FLAG_FREQS) {
for (int docId = docsEnum.nextDoc(); docId != DocIdSetIterator.NO_MORE_DOCS; docId = docsEnum.nextDoc()) {
docFreq++;
// docsEnum.freq() returns 1 if doc indexed with IndexOptions.DOCS_ONLY so no way of knowing if value
// is really 1 or unrecorded when filtering like this
totalTermFreq += docsEnum.freq();
if (anEnum.termsEnum.seekExact(text)) {
if (anEnum.bits == null) {
docFreq += anEnum.termsEnum.docFreq();
if (docsEnumFlag == DocsEnum.FLAG_FREQS) {
long leafTotalTermFreq = anEnum.termsEnum.totalTermFreq();
if (totalTermFreq == -1 || leafTotalTermFreq == -1) {
totalTermFreq = -1;
continue;
}
totalTermFreq += leafTotalTermFreq;
}
} else {
for (int docId = docsEnum.nextDoc(); docId != DocIdSetIterator.NO_MORE_DOCS; docId = docsEnum.nextDoc()) {
// docsEnum.freq() behaviour is undefined if docsEnumFlag==DocsEnum.FLAG_NONE so don't bother with call
docFreq++;
final DocsEnum docsEnum = anEnum.docsEnum = anEnum.termsEnum.docs(anEnum.bits, anEnum.docsEnum, docsEnumFlag);
// 2 choices for performing same heavy loop - one attempts to calculate totalTermFreq and other does not
if (docsEnumFlag == DocsEnum.FLAG_FREQS) {
for (int docId = docsEnum.nextDoc(); docId != DocIdSetIterator.NO_MORE_DOCS; docId = docsEnum.nextDoc()) {
docFreq++;
// docsEnum.freq() returns 1 if doc indexed with IndexOptions.DOCS_ONLY so no way of knowing if value
// is really 1 or unrecorded when filtering like this
totalTermFreq += docsEnum.freq();
}
} else {
for (int docId = docsEnum.nextDoc(); docId != DocIdSetIterator.NO_MORE_DOCS; docId = docsEnum.nextDoc()) {
// docsEnum.freq() behaviour is undefined if docsEnumFlag==DocsEnum.FLAG_NONE so don't bother with call
docFreq++;
}
}
}
}
}
if (docFreq > 0) {
currentDocFreq = docFreq;
currentTotalTermFreq = totalTermFreq;
current = text;
return true;
} else {
currentDocFreq = NOT_FOUND;
currentTotalTermFreq = NOT_FOUND;
current = null;
return false;
}
return found;
}

@Override
Expand Down
Expand Up @@ -76,22 +76,18 @@ public boolean seekExact(BytesRef text) throws IOException {
boolean found = true;
if (needDocFreqs) {
currentDocFreq = termDocFreqs.get(currentTermOrd);
if (currentDocFreq == NOT_FOUND) {
found = false;
}
found = currentDocFreq != NOT_FOUND;
}
if (needTotalTermFreqs) {
currentTotalTermFreq = termsTotalFreqs.get(currentTermOrd);
if (currentTotalTermFreq == NOT_FOUND) {
found = false;
}
found = currentTotalTermFreq != NOT_FOUND;
}
current = found ? text : null;
return found;
}

//Cache miss - gather stats
boolean found = super.seekExact(text);
final boolean found = super.seekExact(text);

//Cache the result - found or not.
if (needDocFreqs) {
Expand Down

0 comments on commit 6de8dd2

Please sign in to comment.