Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Made the UnifiedHighlighter's hasUnrecognizedQuery function processes FunctionQuery the same way as MatchAllDocsQuery and MatchNoDocsQuery queries for performance reasons. #13165

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
Expand Up @@ -49,6 +49,7 @@
import org.apache.lucene.index.StoredFields;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermVectors;
import org.apache.lucene.queries.function.FunctionQuery;
import org.apache.lucene.queries.spans.SpanQuery;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
Expand Down Expand Up @@ -1117,6 +1118,12 @@ protected UHComponents getHighlightComponents(String field, Query query, Set<Ter
highlightFlags);
}

protected boolean isIgnorableQuery(Query q) {
return q instanceof MatchAllDocsQuery
|| q instanceof MatchNoDocsQuery
|| q instanceof FunctionQuery;
}

protected boolean hasUnrecognizedQuery(Predicate<String> fieldMatcher, Query query) {
boolean[] hasUnknownLeaf = new boolean[1];
query.visit(
Expand All @@ -1130,7 +1137,7 @@ public boolean acceptField(String field) {
@Override
public void visitLeaf(Query query) {
if (MultiTermHighlighting.canExtractAutomataFromLeafQuery(query) == false) {
if (!(query instanceof MatchAllDocsQuery || query instanceof MatchNoDocsQuery)) {
if (!UnifiedHighlighter.this.isIgnorableQuery(query)) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add a test to TestUnifiedHighlighterExtensibility that checks that overrides to isIgnorableQuery work here as well? I think it will, but to be perfectly honest I can never remember the inheritance rules around class-specific this calls...

hasUnknownLeaf[0] = true;
}
}
Expand Down
Expand Up @@ -38,11 +38,15 @@
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.function.FunctionQuery;
import org.apache.lucene.queries.function.valuesource.ConstValueSource;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
Expand Down Expand Up @@ -1662,4 +1666,78 @@ public void testQueryWithLongTerm() throws IOException {

ir.close();
}

public void testPostingsOffsetStrategy() throws Exception {
if (this.fieldType.indexOptions() != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS
|| this.fieldType.storeTermVectors()) {
// ignore if fieldType is not POSTINGS only
return;
}

final UnifiedHighlighter.OffsetSource expectedOffsetSource;
if (this.fieldType.storeTermVectors()) {
expectedOffsetSource = UnifiedHighlighter.OffsetSource.POSTINGS_WITH_TERM_VECTORS;
} else {
expectedOffsetSource = UnifiedHighlighter.OffsetSource.POSTINGS;
}

RandomIndexWriter iw = newIndexOrderPreservingWriter();

Field body = new Field("body", "", fieldType);
Document doc = new Document();
doc.add(body);

body.setStringValue(
"This is a test. Just a test highlighting from postings. Feel free to ignore.");
iw.addDocument(doc);
body.setStringValue("Highlighting the first term. Hope it works.");
iw.addDocument(doc);

IndexReader ir = iw.getReader();
iw.close();

IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
Query query = new TermQuery(new Term("body", "highlighting"));
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);

Set<Term> queryTerms = UnifiedHighlighter.extractTerms(query);
FieldHighlighter fieldHighlighter =
highlighter.getFieldHighlighter("body", query, queryTerms, 1);
assertEquals(
expectedOffsetSource,
fieldHighlighter
.getOffsetSource()); // TermQuery is compatible with POSTINGS offset strategy

String[] snippets = highlighter.highlight("body", query, topDocs);

for (Query noEffectQuery :
new Query[] {
new MatchAllDocsQuery(),
new MatchNoDocsQuery(),
new FunctionQuery(new ConstValueSource(5))
}) {
final Query booleanQuery =
new BooleanQuery.Builder()
.add(noEffectQuery, BooleanClause.Occur.MUST)
.add(query, BooleanClause.Occur.MUST)
.build();
queryTerms = UnifiedHighlighter.extractTerms(booleanQuery);
fieldHighlighter = highlighter.getFieldHighlighter("body", booleanQuery, queryTerms, 1);
assertEquals(
noEffectQuery.getClass().toString(),
expectedOffsetSource,
fieldHighlighter
.getOffsetSource()); // combining to a query with no effet (on highlighting) should
// lead to the same highlighter behavior

String[] bqSnippets = highlighter.highlight("body", query, topDocs);
assertArrayEquals(
Arrays.toString(bqSnippets),
snippets,
bqSnippets); // ensuring that the combined query does produce the same output
}

ir.close();
}
}