diff --git a/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneBooleanQuery.java b/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneBooleanQuery.java index 7ce242a964..05d90854cb 100644 --- a/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneBooleanQuery.java +++ b/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneBooleanQuery.java @@ -52,6 +52,16 @@ public LuceneBooleanQuery(@Nonnull List children, @Nonnull Bo this.occur = occur; } + @Nonnull + protected List getChildren() { + return children; + } + + @Nonnull + protected BooleanClause.Occur getOccur() { + return occur; + } + @Override public Query bind(@Nonnull FDBRecordStoreBase store, @Nonnull Index index, @Nonnull EvaluationContext context) { BooleanQuery.Builder builder = new BooleanQuery.Builder(); diff --git a/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneNotQuery.java b/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneNotQuery.java new file mode 100644 index 0000000000..4057f66134 --- /dev/null +++ b/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneNotQuery.java @@ -0,0 +1,104 @@ +/* + * LuceneNotQuery.java + * + * This source file is part of the FoundationDB open source project + * + * Copyright 2022 Apple Inc. and the FoundationDB project authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.apple.foundationdb.record.lucene; + +import com.apple.foundationdb.annotation.API; +import com.apple.foundationdb.record.EvaluationContext; +import com.apple.foundationdb.record.PlanHashable; +import com.apple.foundationdb.record.metadata.Index; +import com.apple.foundationdb.record.provider.foundationdb.FDBRecordStoreBase; +import com.apple.foundationdb.record.query.plan.cascades.explain.Attribute; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.Query; + +import javax.annotation.Nonnull; +import java.util.Collections; +import java.util.List; +import java.util.Objects; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +/** + * Binder for a negation of clauses. + * Because of the way Lucene {@link BooleanQuery} works, this actually represents set subtraction, + * with a set of positive and negative clauses. For the same reason, there is no disjunctive analogue. + */ +@API(API.Status.UNSTABLE) +public class LuceneNotQuery extends LuceneBooleanQuery { + @Nonnull + private final List negatedChildren; + + public LuceneNotQuery(@Nonnull List children, @Nonnull List negatedChildren) { + super(children, BooleanClause.Occur.MUST); + this.negatedChildren = negatedChildren; + } + + public LuceneNotQuery(@Nonnull LuceneQueryClause negatedChild) { + this(Collections.emptyList(), Collections.singletonList(negatedChild)); + } + + @Nonnull + protected List getNegatedChildren() { + return negatedChildren; + } + + @Override + public Query bind(@Nonnull FDBRecordStoreBase store, @Nonnull Index index, @Nonnull EvaluationContext context) { + BooleanQuery.Builder builder = new BooleanQuery.Builder(); + if (getChildren().isEmpty()) { + // Lucene cannot handle all negated clauses. + builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); + } else { + for (LuceneQueryClause child : getChildren()) { + builder.add(child.bind(store, index, context), BooleanClause.Occur.MUST); + } + } + for (LuceneQueryClause child : negatedChildren) { + builder.add(child.bind(store, index, context), BooleanClause.Occur.MUST_NOT); + } + return builder.build(); + } + + @Override + public void getPlannerGraphDetails(@Nonnull ImmutableList.Builder detailsBuilder, @Nonnull ImmutableMap.Builder attributeMapBuilder) { + super.getPlannerGraphDetails(detailsBuilder, attributeMapBuilder); + for (LuceneQueryClause child : negatedChildren) { + child.getPlannerGraphDetails(detailsBuilder, attributeMapBuilder); + } + } + + @Override + public int planHash(@Nonnull final PlanHashKind hashKind) { + return super.planHash() - PlanHashable.iterablePlanHash(hashKind, negatedChildren); + } + + @Override + public String toString() { + return Stream.concat( + getChildren().stream().map(Objects::toString), + negatedChildren.stream().map(c -> "NOT " + c) + ).collect(Collectors.joining(" AND ")); + } +} diff --git a/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LucenePlanner.java b/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LucenePlanner.java index df142c2d13..e54723eef3 100644 --- a/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LucenePlanner.java +++ b/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LucenePlanner.java @@ -37,6 +37,7 @@ import com.apple.foundationdb.record.query.expressions.ComponentWithSingleChild; import com.apple.foundationdb.record.query.expressions.FieldWithComparison; import com.apple.foundationdb.record.query.expressions.NestedField; +import com.apple.foundationdb.record.query.expressions.NotComponent; import com.apple.foundationdb.record.query.expressions.OneOfThemWithComponent; import com.apple.foundationdb.record.query.expressions.OrComponent; import com.apple.foundationdb.record.query.expressions.QueryComponent; @@ -206,6 +207,8 @@ private LuceneQueryClause getQueryForFilter(@Nonnull LucenePlanState state, @Non return getQueryForLuceneComponent(state, (LuceneQueryComponent)filter, filterMask); } else if (filter instanceof AndOrComponent) { return getQueryForAndOr(state, (AndOrComponent) filter, parentFieldName, filterMask); + } else if (filter instanceof NotComponent) { + return getQueryForNot(state, (NotComponent) filter, parentFieldName, filterMask); } else if (filter instanceof FieldWithComparison) { return getQueryForFieldWithComparison(state, (FieldWithComparison) filter, parentFieldName, filterMask); } else if (filter instanceof OneOfThemWithComponent) { @@ -244,6 +247,8 @@ private LuceneQueryClause getQueryForAndOr(@Nonnull LucenePlanState state, @Nonn final Iterator subFilterMasks = filterMask != null ? filterMask.getChildren().iterator() : null; final List filters = filter.getChildren(); final List childClauses = new ArrayList<>(filters.size()); + final List negatedChildren = new ArrayList<>(0); + final BooleanClause.Occur occur = filter instanceof OrComponent ? BooleanClause.Occur.SHOULD : BooleanClause.Occur.MUST; for (QueryComponent subFilter : filters) { final FilterSatisfiedMask childMask = subFilterMasks != null ? subFilterMasks.next() : null; LuceneQueryClause childClause = getQueryForFilter(state, subFilter, parentFieldName, childMask); @@ -256,19 +261,79 @@ private LuceneQueryClause getQueryForAndOr(@Nonnull LucenePlanState state, @Nonn if (childMask != null) { childMask.setSatisfied(true); } - childClauses.add(childClause); + if (childClause instanceof LuceneBooleanQuery && ((LuceneBooleanQuery)childClause).getOccur() == occur) { + childClauses.addAll(((LuceneBooleanQuery)childClause).getChildren()); + if (childClause instanceof LuceneNotQuery) { + negatedChildren.addAll(((LuceneNotQuery)childClause).getNegatedChildren()); + } + } else { + childClauses.add(childClause); + } } if (filterMask != null && filterMask.getUnsatisfiedFilters().isEmpty()) { filterMask.setSatisfied(true); } + if (!negatedChildren.isEmpty()) { + return new LuceneNotQuery(childClauses, negatedChildren); + } // Don't do Lucene scan if none are satisfied, though. if (childClauses.isEmpty()) { return null; } - final BooleanClause.Occur occur = filter instanceof OrComponent ? BooleanClause.Occur.SHOULD : BooleanClause.Occur.MUST; return new LuceneBooleanQuery(childClauses, occur); } + @Nullable + private LuceneQueryClause getQueryForNot(@Nonnull LucenePlanState state, @Nonnull NotComponent filter, + @Nullable String parentFieldName, @Nullable FilterSatisfiedMask filterMask) { + final LuceneQueryClause childClause = getQueryForFilter(state, filter.getChild(), parentFieldName, filterMask == null ? null : filterMask.getChildren().get(0)); + if (childClause == null) { + return null; + } + if (filterMask != null) { + filterMask.setSatisfied(true); + } + return negate(childClause); + } + + @Nonnull + private static LuceneQueryClause negate(@Nonnull LuceneQueryClause clause) { + if (clause instanceof LuceneBooleanQuery) { + final LuceneBooleanQuery booleanQuery = (LuceneBooleanQuery)clause; + final BooleanClause.Occur occur = booleanQuery.getOccur(); + if (occur == BooleanClause.Occur.MUST) { + List clauses = new ArrayList<>(); + for (LuceneQueryClause child : booleanQuery.getChildren()) { + clauses.add(negate(child)); + } + if (clause instanceof LuceneNotQuery) { + LuceneNotQuery notQuery = (LuceneNotQuery)clause; + if (clauses.isEmpty() && notQuery.getNegatedChildren().size() == 1) { + return notQuery.getNegatedChildren().get(0); + } + clauses.addAll(notQuery.getNegatedChildren()); + } + return new LuceneBooleanQuery(clauses, BooleanClause.Occur.SHOULD); + } else { + List positive = new ArrayList<>(); + List negative = new ArrayList<>(); + for (LuceneQueryClause child : booleanQuery.getChildren()) { + if (child instanceof LuceneBooleanQuery) { + positive.add(negate(child)); + } else { + negative.add(child); + } + } + if (negative.isEmpty()) { + return new LuceneBooleanQuery(positive, BooleanClause.Occur.MUST); + } else { + return new LuceneNotQuery(positive, negative); + } + } + } + return new LuceneNotQuery(clause); + } + @Nullable private LuceneQueryClause getQueryForFieldWithComparison(@Nonnull LucenePlanState state, @Nonnull FieldWithComparison filter, @Nullable String parentFieldName, @Nullable FilterSatisfiedMask filterSatisfiedMask) { diff --git a/fdb-record-layer-lucene/src/test/java/com/apple/foundationdb/record/lucene/FDBLuceneQueryTest.java b/fdb-record-layer-lucene/src/test/java/com/apple/foundationdb/record/lucene/FDBLuceneQueryTest.java index 02c0c3f88a..bf3289fc7b 100644 --- a/fdb-record-layer-lucene/src/test/java/com/apple/foundationdb/record/lucene/FDBLuceneQueryTest.java +++ b/fdb-record-layer-lucene/src/test/java/com/apple/foundationdb/record/lucene/FDBLuceneQueryTest.java @@ -20,6 +20,7 @@ package com.apple.foundationdb.record.lucene; +import com.apple.foundationdb.record.EvaluationContext; import com.apple.foundationdb.record.RecordCursor; import com.apple.foundationdb.record.RecordMetaData; import com.apple.foundationdb.record.RecordMetaDataBuilder; @@ -55,6 +56,7 @@ import com.google.protobuf.Message; import org.apache.commons.lang3.tuple.Pair; import org.hamcrest.Matcher; +import org.hamcrest.Matchers; import org.junit.jupiter.api.Tag; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; @@ -941,4 +943,80 @@ void fullGroupScan() throws Exception { } } + @Test + void andNot() throws Exception { + initializeFlat(); + try (FDBRecordContext context = openContext()) { + openRecordStore(context); + final QueryComponent filter1 = new LuceneQueryComponent("Verona", Lists.newArrayList("text"), true); + final QueryComponent filter2 = new LuceneQueryComponent("traffic", Lists.newArrayList("text"), true); + RecordQuery query = RecordQuery.newBuilder() + .setRecordType(TextIndexTestUtils.SIMPLE_DOC) + .setFilter(Query.and(filter1, Query.not(filter2))) + .build(); + RecordQueryPlan plan = planner.plan(query); + Matcher matcher = indexScan(allOf(indexScan("Complex$text_index"), + indexScanType(LuceneScanTypes.BY_LUCENE), + scanParams(query(hasToString("MULTI Verona AND NOT MULTI traffic"))))); + assertThat(plan, matcher); + assertThat(getLuceneQuery(plan), Matchers.hasToString("+(text:verona) -(text:traffic)")); + RecordCursor> fdbQueriedRecordRecordCursor = recordStore.executeQuery(plan); + RecordCursor map = fdbQueriedRecordRecordCursor.map(FDBQueriedRecord::getPrimaryKey); + List primaryKeys = map.map(t -> t.getLong(0)).asList().get(); + assertEquals(Set.of(2L), Set.copyOf(primaryKeys)); + } + } + + @Test + void justNot() throws Exception { + initializeFlat(); + try (FDBRecordContext context = openContext()) { + openRecordStore(context); + final QueryComponent filter = new LuceneQueryComponent("Verona", Lists.newArrayList("text"), true); + RecordQuery query = RecordQuery.newBuilder() + .setRecordType(TextIndexTestUtils.SIMPLE_DOC) + .setFilter(Query.not(filter)) + .build(); + RecordQueryPlan plan = planner.plan(query); + Matcher matcher = indexScan(allOf(indexScan("Complex$text_index"), + indexScanType(LuceneScanTypes.BY_LUCENE), + scanParams(query(hasToString("NOT MULTI Verona"))))); + assertThat(plan, matcher); + assertThat(getLuceneQuery(plan), Matchers.hasToString("+*:* -(text:verona)")); + RecordCursor> fdbQueriedRecordRecordCursor = recordStore.executeQuery(plan); + RecordCursor map = fdbQueriedRecordRecordCursor.map(FDBQueriedRecord::getPrimaryKey); + List primaryKeys = map.map(t -> t.getLong(0)).asList().get(); + assertEquals(Set.of(0L, 1L, 3L, 5L), Set.copyOf(primaryKeys)); + } + } + + @Test + void notOr() throws Exception { + initializeFlat(); + try (FDBRecordContext context = openContext()) { + openRecordStore(context); + final QueryComponent filter1 = new LuceneQueryComponent("Verona", Lists.newArrayList("text"), true); + final QueryComponent filter2 = new LuceneQueryComponent("traffic", Lists.newArrayList("text"), true); + RecordQuery query = RecordQuery.newBuilder() + .setRecordType(TextIndexTestUtils.SIMPLE_DOC) + .setFilter(Query.not(Query.or(filter1, filter2))) + .build(); + RecordQueryPlan plan = planner.plan(query); + Matcher matcher = indexScan(allOf(indexScan("Complex$text_index"), + indexScanType(LuceneScanTypes.BY_LUCENE), + scanParams(query(hasToString("NOT MULTI Verona AND NOT MULTI traffic"))))); + assertThat(plan, matcher); + assertThat(getLuceneQuery(plan), Matchers.hasToString("+*:* -(text:verona) -(text:traffic)")); + RecordCursor> fdbQueriedRecordRecordCursor = recordStore.executeQuery(plan); + RecordCursor map = fdbQueriedRecordRecordCursor.map(FDBQueriedRecord::getPrimaryKey); + List primaryKeys = map.map(t -> t.getLong(0)).asList().get(); + assertEquals(Set.of(0L, 1L, 3L), Set.copyOf(primaryKeys)); + } + } + + private org.apache.lucene.search.Query getLuceneQuery(RecordQueryPlan plan) { + LuceneIndexQueryPlan indexPlan = (LuceneIndexQueryPlan)plan; + LuceneScanQuery scan = (LuceneScanQuery)indexPlan.getScanParameters().bind(recordStore, recordStore.getRecordMetaData().getIndex(indexPlan.getIndexName()), EvaluationContext.EMPTY); + return scan.getQuery(); + } }