apache · vletard · Dec 13, 2023 · Dec 13, 2023 · Mar 6, 2024 · Mar 6, 2024
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UnifiedHighlighter.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UnifiedHighlighter.java
@@ -49,6 +49,7 @@
 import org.apache.lucene.index.StoredFields;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.TermVectors;
+import org.apache.lucene.queries.function.FunctionQuery;
 import org.apache.lucene.queries.spans.SpanQuery;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.IndexSearcher;
@@ -99,6 +100,9 @@ public class UnifiedHighlighter {
 
   public static final int DEFAULT_CACHE_CHARS_THRESHOLD = 524288; // ~ 1 MB (2 byte chars)
 
+  public static final Set<Class<? extends Query>> QUERIES_WITH_NO_HL_EFFECT =
+      Set.of(MatchAllDocsQuery.class, MatchNoDocsQuery.class, FunctionQuery.class);
+
   protected static final LabelledCharArrayMatcher[] ZERO_LEN_AUTOMATA_ARRAY =
       new LabelledCharArrayMatcher[0];
 
@@ -1130,7 +1134,16 @@ public boolean acceptField(String field) {
           @Override
           public void visitLeaf(Query query) {
             if (MultiTermHighlighting.canExtractAutomataFromLeafQuery(query) == false) {
-              if (!(query instanceof MatchAllDocsQuery || query instanceof MatchNoDocsQuery)) {
+              boolean no_effect_query = false;
+              for (Class<? extends Query> queryType :
+                  UnifiedHighlighter.QUERIES_WITH_NO_HL_EFFECT) {
+                if (queryType.isInstance(query)) {
+                  no_effect_query = true;
+                  break;
+                }
+              }
+
+              if (!no_effect_query) {
                 hasUnknownLeaf[0] = true;
               }
             }

diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighter.java b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighter.java
@@ -38,11 +38,15 @@
 import org.apache.lucene.index.IndexOptions;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.Term;
+import org.apache.lucene.queries.function.FunctionQuery;
+import org.apache.lucene.queries.function.valuesource.ConstValueSource;
 import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.FuzzyQuery;
 import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.MatchNoDocsQuery;
 import org.apache.lucene.search.PhraseQuery;
 import org.apache.lucene.search.PrefixQuery;
 import org.apache.lucene.search.Query;
@@ -1662,4 +1666,78 @@ public void testQueryWithLongTerm() throws IOException {
 
     ir.close();
   }
+
+  public void testPostingsOffsetStrategy() throws Exception {
+    if (this.fieldType.indexOptions() != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS
+        || this.fieldType.storeTermVectors()) {
+      // ignore if fieldType is not POSTINGS only
+      return;
+    }
+
+    final UnifiedHighlighter.OffsetSource expectedOffsetSource;
+    if (this.fieldType.storeTermVectors()) {
+      expectedOffsetSource = UnifiedHighlighter.OffsetSource.POSTINGS_WITH_TERM_VECTORS;
+    } else {
+      expectedOffsetSource = UnifiedHighlighter.OffsetSource.POSTINGS;
+    }
+
+    RandomIndexWriter iw = newIndexOrderPreservingWriter();
+
+    Field body = new Field("body", "", fieldType);
+    Document doc = new Document();
+    doc.add(body);
+
+    body.setStringValue(
+        "This is a test. Just a test highlighting from postings. Feel free to ignore.");
+    iw.addDocument(doc);
+    body.setStringValue("Highlighting the first term. Hope it works.");
+    iw.addDocument(doc);
+
+    IndexReader ir = iw.getReader();
+    iw.close();
+
+    IndexSearcher searcher = newSearcher(ir);
+    UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
+    Query query = new TermQuery(new Term("body", "highlighting"));
+    TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
+
+    Set<Term> queryTerms = UnifiedHighlighter.extractTerms(query);
+    FieldHighlighter fieldHighlighter =
+        highlighter.getFieldHighlighter("body", query, queryTerms, 1);
+    assertEquals(
+        expectedOffsetSource,
+        fieldHighlighter
+            .getOffsetSource()); // TermQuery is compatible with POSTINGS offset strategy
+
+    String[] snippets = highlighter.highlight("body", query, topDocs);
+
+    for (Query noEffectQuery :
+        new Query[] {
+          new MatchAllDocsQuery(),
+          new MatchNoDocsQuery(),
+          new FunctionQuery(new ConstValueSource(5))
+        }) {
+      final Query booleanQuery =
+          new BooleanQuery.Builder()
+              .add(noEffectQuery, BooleanClause.Occur.MUST)
+              .add(query, BooleanClause.Occur.MUST)
+              .build();
+      queryTerms = UnifiedHighlighter.extractTerms(booleanQuery);
+      fieldHighlighter = highlighter.getFieldHighlighter("body", booleanQuery, queryTerms, 1);
+      assertEquals(
+          noEffectQuery.getClass().toString(),
+          expectedOffsetSource,
+          fieldHighlighter
+              .getOffsetSource()); // combining to a query with no effet (on highlighting) should
+      // lead to the same highlighter behavior
+
+      String[] bqSnippets = highlighter.highlight("body", query, topDocs);
+      assertArrayEquals(
+          Arrays.toString(bqSnippets),
+          snippets,
+          bqSnippets); // ensuring that the combined query does produce the same output
+    }
+
+    ir.close();
+  }
 }