Skip to content

Commit

Permalink
Resolves #1691: Lucene negative queries
Browse files Browse the repository at this point in the history
At root a new clause class that produces a negated boolean.
But since Lucene represents that as another Occur, the clause actually implements set subtraction.
The planner tries to maximize the cases where the subtraction is from a positive query rather than just *:*.
  • Loading branch information
MMcM committed Jun 3, 2022
1 parent c605c18 commit 410a9f5
Show file tree
Hide file tree
Showing 4 changed files with 259 additions and 2 deletions.
Expand Up @@ -52,6 +52,16 @@ public LuceneBooleanQuery(@Nonnull List<LuceneQueryClause> children, @Nonnull Bo
this.occur = occur;
}

@Nonnull
protected List<LuceneQueryClause> getChildren() {
return children;
}

@Nonnull
protected BooleanClause.Occur getOccur() {
return occur;
}

@Override
public Query bind(@Nonnull FDBRecordStoreBase<?> store, @Nonnull Index index, @Nonnull EvaluationContext context) {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
Expand Down
@@ -0,0 +1,104 @@
/*
* LuceneNotQuery.java
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2022 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.apple.foundationdb.record.lucene;

import com.apple.foundationdb.annotation.API;
import com.apple.foundationdb.record.EvaluationContext;
import com.apple.foundationdb.record.PlanHashable;
import com.apple.foundationdb.record.metadata.Index;
import com.apple.foundationdb.record.provider.foundationdb.FDBRecordStoreBase;
import com.apple.foundationdb.record.query.plan.cascades.explain.Attribute;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;

import javax.annotation.Nonnull;
import java.util.Collections;
import java.util.List;
import java.util.Objects;
import java.util.stream.Collectors;
import java.util.stream.Stream;

/**
* Binder for a negation of clauses.
* Because of the way Lucene {@link BooleanQuery} works, this actually represents set subtraction,
* with a set of positive and negative clauses. For the same reason, there is no disjunctive analogue.
*/
@API(API.Status.UNSTABLE)
public class LuceneNotQuery extends LuceneBooleanQuery {
@Nonnull
private final List<LuceneQueryClause> negatedChildren;

public LuceneNotQuery(@Nonnull List<LuceneQueryClause> children, @Nonnull List<LuceneQueryClause> negatedChildren) {
super(children, BooleanClause.Occur.MUST);
this.negatedChildren = negatedChildren;
}

public LuceneNotQuery(@Nonnull LuceneQueryClause negatedChild) {
this(Collections.emptyList(), Collections.singletonList(negatedChild));
}

@Nonnull
protected List<LuceneQueryClause> getNegatedChildren() {
return negatedChildren;
}

@Override
public Query bind(@Nonnull FDBRecordStoreBase<?> store, @Nonnull Index index, @Nonnull EvaluationContext context) {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
if (getChildren().isEmpty()) {
// Lucene cannot handle all negated clauses.
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
} else {
for (LuceneQueryClause child : getChildren()) {
builder.add(child.bind(store, index, context), BooleanClause.Occur.MUST);
}
}
for (LuceneQueryClause child : negatedChildren) {
builder.add(child.bind(store, index, context), BooleanClause.Occur.MUST_NOT);
}
return builder.build();
}

@Override
public void getPlannerGraphDetails(@Nonnull ImmutableList.Builder<String> detailsBuilder, @Nonnull ImmutableMap.Builder<String, Attribute> attributeMapBuilder) {
super.getPlannerGraphDetails(detailsBuilder, attributeMapBuilder);
for (LuceneQueryClause child : negatedChildren) {
child.getPlannerGraphDetails(detailsBuilder, attributeMapBuilder);
}
}

@Override
public int planHash(@Nonnull final PlanHashKind hashKind) {
return super.planHash() - PlanHashable.iterablePlanHash(hashKind, negatedChildren);
}

@Override
public String toString() {
return Stream.concat(
getChildren().stream().map(Objects::toString),
negatedChildren.stream().map(c -> "NOT " + c)
).collect(Collectors.joining(" AND "));
}
}
Expand Up @@ -37,6 +37,7 @@
import com.apple.foundationdb.record.query.expressions.ComponentWithSingleChild;
import com.apple.foundationdb.record.query.expressions.FieldWithComparison;
import com.apple.foundationdb.record.query.expressions.NestedField;
import com.apple.foundationdb.record.query.expressions.NotComponent;
import com.apple.foundationdb.record.query.expressions.OneOfThemWithComponent;
import com.apple.foundationdb.record.query.expressions.OrComponent;
import com.apple.foundationdb.record.query.expressions.QueryComponent;
Expand Down Expand Up @@ -206,6 +207,8 @@ private LuceneQueryClause getQueryForFilter(@Nonnull LucenePlanState state, @Non
return getQueryForLuceneComponent(state, (LuceneQueryComponent)filter, filterMask);
} else if (filter instanceof AndOrComponent) {
return getQueryForAndOr(state, (AndOrComponent) filter, parentFieldName, filterMask);
} else if (filter instanceof NotComponent) {
return getQueryForNot(state, (NotComponent) filter, parentFieldName, filterMask);
} else if (filter instanceof FieldWithComparison) {
return getQueryForFieldWithComparison(state, (FieldWithComparison) filter, parentFieldName, filterMask);
} else if (filter instanceof OneOfThemWithComponent) {
Expand Down Expand Up @@ -244,6 +247,8 @@ private LuceneQueryClause getQueryForAndOr(@Nonnull LucenePlanState state, @Nonn
final Iterator<FilterSatisfiedMask> subFilterMasks = filterMask != null ? filterMask.getChildren().iterator() : null;
final List<QueryComponent> filters = filter.getChildren();
final List<LuceneQueryClause> childClauses = new ArrayList<>(filters.size());
final List<LuceneQueryClause> negatedChildren = new ArrayList<>(0);
final BooleanClause.Occur occur = filter instanceof OrComponent ? BooleanClause.Occur.SHOULD : BooleanClause.Occur.MUST;
for (QueryComponent subFilter : filters) {
final FilterSatisfiedMask childMask = subFilterMasks != null ? subFilterMasks.next() : null;
LuceneQueryClause childClause = getQueryForFilter(state, subFilter, parentFieldName, childMask);
Expand All @@ -256,19 +261,79 @@ private LuceneQueryClause getQueryForAndOr(@Nonnull LucenePlanState state, @Nonn
if (childMask != null) {
childMask.setSatisfied(true);
}
childClauses.add(childClause);
if (childClause instanceof LuceneBooleanQuery && ((LuceneBooleanQuery)childClause).getOccur() == occur) {
childClauses.addAll(((LuceneBooleanQuery)childClause).getChildren());
if (childClause instanceof LuceneNotQuery) {
negatedChildren.addAll(((LuceneNotQuery)childClause).getNegatedChildren());
}
} else {
childClauses.add(childClause);
}
}
if (filterMask != null && filterMask.getUnsatisfiedFilters().isEmpty()) {
filterMask.setSatisfied(true);
}
if (!negatedChildren.isEmpty()) {
return new LuceneNotQuery(childClauses, negatedChildren);
}
// Don't do Lucene scan if none are satisfied, though.
if (childClauses.isEmpty()) {
return null;
}
final BooleanClause.Occur occur = filter instanceof OrComponent ? BooleanClause.Occur.SHOULD : BooleanClause.Occur.MUST;
return new LuceneBooleanQuery(childClauses, occur);
}

@Nullable
private LuceneQueryClause getQueryForNot(@Nonnull LucenePlanState state, @Nonnull NotComponent filter,
@Nullable String parentFieldName, @Nullable FilterSatisfiedMask filterMask) {
final LuceneQueryClause childClause = getQueryForFilter(state, filter.getChild(), parentFieldName, filterMask == null ? null : filterMask.getChildren().get(0));
if (childClause == null) {
return null;
}
if (filterMask != null) {
filterMask.setSatisfied(true);
}
return negate(childClause);
}

@Nonnull
private static LuceneQueryClause negate(@Nonnull LuceneQueryClause clause) {
if (clause instanceof LuceneBooleanQuery) {
final LuceneBooleanQuery booleanQuery = (LuceneBooleanQuery)clause;
final BooleanClause.Occur occur = booleanQuery.getOccur();
if (occur == BooleanClause.Occur.MUST) {
List<LuceneQueryClause> clauses = new ArrayList<>();
for (LuceneQueryClause child : booleanQuery.getChildren()) {
clauses.add(negate(child));
}
if (clause instanceof LuceneNotQuery) {
LuceneNotQuery notQuery = (LuceneNotQuery)clause;
if (clauses.isEmpty() && notQuery.getNegatedChildren().size() == 1) {
return notQuery.getNegatedChildren().get(0);
}
clauses.addAll(notQuery.getNegatedChildren());
}
return new LuceneBooleanQuery(clauses, BooleanClause.Occur.SHOULD);
} else {
List<LuceneQueryClause> positive = new ArrayList<>();
List<LuceneQueryClause> negative = new ArrayList<>();
for (LuceneQueryClause child : booleanQuery.getChildren()) {
if (child instanceof LuceneBooleanQuery) {
positive.add(negate(child));
} else {
negative.add(child);
}
}
if (negative.isEmpty()) {
return new LuceneBooleanQuery(positive, BooleanClause.Occur.MUST);
} else {
return new LuceneNotQuery(positive, negative);
}
}
}
return new LuceneNotQuery(clause);
}

@Nullable
private LuceneQueryClause getQueryForFieldWithComparison(@Nonnull LucenePlanState state, @Nonnull FieldWithComparison filter,
@Nullable String parentFieldName, @Nullable FilterSatisfiedMask filterSatisfiedMask) {
Expand Down
Expand Up @@ -20,6 +20,7 @@

package com.apple.foundationdb.record.lucene;

import com.apple.foundationdb.record.EvaluationContext;
import com.apple.foundationdb.record.RecordCursor;
import com.apple.foundationdb.record.RecordMetaData;
import com.apple.foundationdb.record.RecordMetaDataBuilder;
Expand Down Expand Up @@ -55,6 +56,7 @@
import com.google.protobuf.Message;
import org.apache.commons.lang3.tuple.Pair;
import org.hamcrest.Matcher;
import org.hamcrest.Matchers;
import org.junit.jupiter.api.Tag;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
Expand Down Expand Up @@ -941,4 +943,80 @@ void fullGroupScan() throws Exception {
}
}

@Test
void andNot() throws Exception {
initializeFlat();
try (FDBRecordContext context = openContext()) {
openRecordStore(context);
final QueryComponent filter1 = new LuceneQueryComponent("Verona", Lists.newArrayList("text"), true);
final QueryComponent filter2 = new LuceneQueryComponent("traffic", Lists.newArrayList("text"), true);
RecordQuery query = RecordQuery.newBuilder()
.setRecordType(TextIndexTestUtils.SIMPLE_DOC)
.setFilter(Query.and(filter1, Query.not(filter2)))
.build();
RecordQueryPlan plan = planner.plan(query);
Matcher<RecordQueryPlan> matcher = indexScan(allOf(indexScan("Complex$text_index"),
indexScanType(LuceneScanTypes.BY_LUCENE),
scanParams(query(hasToString("MULTI Verona AND NOT MULTI traffic")))));
assertThat(plan, matcher);
assertThat(getLuceneQuery(plan), Matchers.hasToString("+(text:verona) -(text:traffic)"));
RecordCursor<FDBQueriedRecord<Message>> fdbQueriedRecordRecordCursor = recordStore.executeQuery(plan);
RecordCursor<Tuple> map = fdbQueriedRecordRecordCursor.map(FDBQueriedRecord::getPrimaryKey);
List<Long> primaryKeys = map.map(t -> t.getLong(0)).asList().get();
assertEquals(Set.of(2L), Set.copyOf(primaryKeys));
}
}

@Test
void justNot() throws Exception {
initializeFlat();
try (FDBRecordContext context = openContext()) {
openRecordStore(context);
final QueryComponent filter = new LuceneQueryComponent("Verona", Lists.newArrayList("text"), true);
RecordQuery query = RecordQuery.newBuilder()
.setRecordType(TextIndexTestUtils.SIMPLE_DOC)
.setFilter(Query.not(filter))
.build();
RecordQueryPlan plan = planner.plan(query);
Matcher<RecordQueryPlan> matcher = indexScan(allOf(indexScan("Complex$text_index"),
indexScanType(LuceneScanTypes.BY_LUCENE),
scanParams(query(hasToString("NOT MULTI Verona")))));
assertThat(plan, matcher);
assertThat(getLuceneQuery(plan), Matchers.hasToString("+*:* -(text:verona)"));
RecordCursor<FDBQueriedRecord<Message>> fdbQueriedRecordRecordCursor = recordStore.executeQuery(plan);
RecordCursor<Tuple> map = fdbQueriedRecordRecordCursor.map(FDBQueriedRecord::getPrimaryKey);
List<Long> primaryKeys = map.map(t -> t.getLong(0)).asList().get();
assertEquals(Set.of(0L, 1L, 3L, 5L), Set.copyOf(primaryKeys));
}
}

@Test
void notOr() throws Exception {
initializeFlat();
try (FDBRecordContext context = openContext()) {
openRecordStore(context);
final QueryComponent filter1 = new LuceneQueryComponent("Verona", Lists.newArrayList("text"), true);
final QueryComponent filter2 = new LuceneQueryComponent("traffic", Lists.newArrayList("text"), true);
RecordQuery query = RecordQuery.newBuilder()
.setRecordType(TextIndexTestUtils.SIMPLE_DOC)
.setFilter(Query.not(Query.or(filter1, filter2)))
.build();
RecordQueryPlan plan = planner.plan(query);
Matcher<RecordQueryPlan> matcher = indexScan(allOf(indexScan("Complex$text_index"),
indexScanType(LuceneScanTypes.BY_LUCENE),
scanParams(query(hasToString("NOT MULTI Verona AND NOT MULTI traffic")))));
assertThat(plan, matcher);
assertThat(getLuceneQuery(plan), Matchers.hasToString("+*:* -(text:verona) -(text:traffic)"));
RecordCursor<FDBQueriedRecord<Message>> fdbQueriedRecordRecordCursor = recordStore.executeQuery(plan);
RecordCursor<Tuple> map = fdbQueriedRecordRecordCursor.map(FDBQueriedRecord::getPrimaryKey);
List<Long> primaryKeys = map.map(t -> t.getLong(0)).asList().get();
assertEquals(Set.of(0L, 1L, 3L), Set.copyOf(primaryKeys));
}
}

private org.apache.lucene.search.Query getLuceneQuery(RecordQueryPlan plan) {
LuceneIndexQueryPlan indexPlan = (LuceneIndexQueryPlan)plan;
LuceneScanQuery scan = (LuceneScanQuery)indexPlan.getScanParameters().bind(recordStore, recordStore.getRecordMetaData().getIndex(indexPlan.getIndexName()), EvaluationContext.EMPTY);
return scan.getQuery();
}
}

0 comments on commit 410a9f5

Please sign in to comment.