Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Made the UnifiedHighlighter's hasUnrecognizedQuery function processes FunctionQuery the same way as MatchAllDocsQuery and MatchNoDocsQuery queries for performance reasons. #13165

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
Expand Up @@ -49,6 +49,7 @@
import org.apache.lucene.index.StoredFields;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermVectors;
import org.apache.lucene.queries.function.FunctionQuery;
import org.apache.lucene.queries.spans.SpanQuery;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
Expand Down Expand Up @@ -99,6 +100,9 @@ public class UnifiedHighlighter {

public static final int DEFAULT_CACHE_CHARS_THRESHOLD = 524288; // ~ 1 MB (2 byte chars)

public static final Set<Class<? extends Query>> QUERIES_WITH_NO_HL_EFFECT =
Set.of(MatchAllDocsQuery.class, MatchNoDocsQuery.class, FunctionQuery.class);

protected static final LabelledCharArrayMatcher[] ZERO_LEN_AUTOMATA_ARRAY =
new LabelledCharArrayMatcher[0];

Expand Down Expand Up @@ -1130,7 +1134,16 @@ public boolean acceptField(String field) {
@Override
public void visitLeaf(Query query) {
if (MultiTermHighlighting.canExtractAutomataFromLeafQuery(query) == false) {
if (!(query instanceof MatchAllDocsQuery || query instanceof MatchNoDocsQuery)) {
boolean no_effect_query = false;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This would be simpler as QUERIES_WITH_NO_HL_EFFECT.contains(query.getClass())

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would you say that there is no need to capture hypothetical subclasses?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

MatchAllDocsQuery is already final, but the other two aren't. Maybe this would be better done as a new protected method, isIgnorableQuery(Query q) with a default implementation of instanceof checks, and then if somebody has their own extension (or another query implementation that will do weird things here) they can override the method?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just edited my branch with your suggestion.

for (Class<? extends Query> queryType :
UnifiedHighlighter.QUERIES_WITH_NO_HL_EFFECT) {
if (queryType.isInstance(query)) {
no_effect_query = true;
break;
}
}

if (!no_effect_query) {
hasUnknownLeaf[0] = true;
}
}
Expand Down
Expand Up @@ -38,11 +38,15 @@
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.function.FunctionQuery;
import org.apache.lucene.queries.function.valuesource.ConstValueSource;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
Expand Down Expand Up @@ -1662,4 +1666,78 @@ public void testQueryWithLongTerm() throws IOException {

ir.close();
}

public void testPostingsOffsetStrategy() throws Exception {
if (this.fieldType.indexOptions() != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS
|| this.fieldType.storeTermVectors()) {
// ignore if fieldType is not POSTINGS only
return;
}

final UnifiedHighlighter.OffsetSource expectedOffsetSource;
if (this.fieldType.storeTermVectors()) {
expectedOffsetSource = UnifiedHighlighter.OffsetSource.POSTINGS_WITH_TERM_VECTORS;
} else {
expectedOffsetSource = UnifiedHighlighter.OffsetSource.POSTINGS;
}

RandomIndexWriter iw = newIndexOrderPreservingWriter();

Field body = new Field("body", "", fieldType);
Document doc = new Document();
doc.add(body);

body.setStringValue(
"This is a test. Just a test highlighting from postings. Feel free to ignore.");
iw.addDocument(doc);
body.setStringValue("Highlighting the first term. Hope it works.");
iw.addDocument(doc);

IndexReader ir = iw.getReader();
iw.close();

IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
Query query = new TermQuery(new Term("body", "highlighting"));
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);

Set<Term> queryTerms = UnifiedHighlighter.extractTerms(query);
FieldHighlighter fieldHighlighter =
highlighter.getFieldHighlighter("body", query, queryTerms, 1);
assertEquals(
expectedOffsetSource,
fieldHighlighter
.getOffsetSource()); // TermQuery is compatible with POSTINGS offset strategy

String[] snippets = highlighter.highlight("body", query, topDocs);

for (Query noEffectQuery :
new Query[] {
new MatchAllDocsQuery(),
new MatchNoDocsQuery(),
new FunctionQuery(new ConstValueSource(5))
}) {
final Query booleanQuery =
new BooleanQuery.Builder()
.add(noEffectQuery, BooleanClause.Occur.MUST)
.add(query, BooleanClause.Occur.MUST)
.build();
queryTerms = UnifiedHighlighter.extractTerms(booleanQuery);
fieldHighlighter = highlighter.getFieldHighlighter("body", booleanQuery, queryTerms, 1);
assertEquals(
noEffectQuery.getClass().toString(),
expectedOffsetSource,
fieldHighlighter
.getOffsetSource()); // combining to a query with no effet (on highlighting) should
// lead to the same highlighter behavior

String[] bqSnippets = highlighter.highlight("body", query, topDocs);
assertArrayEquals(
Arrays.toString(bqSnippets),
snippets,
bqSnippets); // ensuring that the combined query does produce the same output
}

ir.close();
}
}