Skip to content

Commit

Permalink
Add back not node (facebookincubator#228)
Browse files Browse the repository at this point in the history
* Add back not node.

* Fix NOT handling in metadata filter.

---------

Co-authored-by: Jimmy Lu <jimmylu@meta.com>
  • Loading branch information
2 people authored and zhejiangxiaomai committed Jun 26, 2023
1 parent 01734cc commit 45fe7ae
Show file tree
Hide file tree
Showing 6 changed files with 159 additions and 90 deletions.
81 changes: 40 additions & 41 deletions velox/dwio/common/MetadataFilter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,8 @@ using LeafResults =
}

struct MetadataFilter::Node {
static std::unique_ptr<Node> fromExpression(
ScanSpec&,
const core::ITypedExpr&);
static std::unique_ptr<Node>
fromExpression(ScanSpec&, const core::ITypedExpr&, bool negated);
virtual ~Node() = default;
virtual uint64_t* eval(LeafResults&, int size) const = 0;
};
Expand Down Expand Up @@ -59,6 +58,18 @@ class MetadataFilter::LeafNode : public Node {
};

struct MetadataFilter::AndNode : Node {
static std::unique_ptr<Node> create(
std::unique_ptr<Node> lhs,
std::unique_ptr<Node> rhs) {
if (!lhs) {
return rhs;
}
if (!rhs) {
return lhs;
}
return std::make_unique<AndNode>(std::move(lhs), std::move(rhs));
}

AndNode(std::unique_ptr<Node> lhs, std::unique_ptr<Node> rhs)
: lhs_(std::move(lhs)), rhs_(std::move(rhs)) {}

Expand All @@ -81,6 +92,15 @@ struct MetadataFilter::AndNode : Node {
};

struct MetadataFilter::OrNode : Node {
static std::unique_ptr<Node> create(
std::unique_ptr<Node> lhs,
std::unique_ptr<Node> rhs) {
if (!lhs || !rhs) {
return nullptr;
}
return std::make_unique<OrNode>(std::move(lhs), std::move(rhs));
}

OrNode(std::unique_ptr<Node> lhs, std::unique_ptr<Node> rhs)
: lhs_(std::move(lhs)), rhs_(std::move(rhs)) {}

Expand All @@ -99,23 +119,6 @@ struct MetadataFilter::OrNode : Node {
std::unique_ptr<Node> rhs_;
};

struct MetadataFilter::NotNode : Node {
explicit NotNode(std::unique_ptr<Node> negated)
: negated_(std::move(negated)) {}

uint64_t* eval(LeafResults& leafResults, int size) const override {
auto* bits = negated_->eval(leafResults, size);
if (!bits) {
return nullptr;
}
bits::negate(reinterpret_cast<char*>(bits), size);
return bits;
}

private:
std::unique_ptr<Node> negated_;
};

namespace {

const core::FieldAccessTypedExpr* asField(
Expand All @@ -133,40 +136,36 @@ const core::CallTypedExpr* asCall(const core::ITypedExpr* expr) {

std::unique_ptr<MetadataFilter::Node> MetadataFilter::Node::fromExpression(
ScanSpec& scanSpec,
const core::ITypedExpr& expr) {
const core::ITypedExpr& expr,
bool negated) {
auto* call = asCall(&expr);
if (!call) {
return nullptr;
}
if (call->name() == "and") {
auto lhs = fromExpression(scanSpec, *call->inputs()[0]);
auto rhs = fromExpression(scanSpec, *call->inputs()[1]);
if (!lhs) {
return rhs;
}
if (!rhs) {
return lhs;
}
return std::make_unique<AndNode>(std::move(lhs), std::move(rhs));
auto lhs = fromExpression(scanSpec, *call->inputs()[0], negated);
auto rhs = fromExpression(scanSpec, *call->inputs()[1], negated);
return negated ? OrNode::create(std::move(lhs), std::move(rhs))
: AndNode::create(std::move(lhs), std::move(rhs));
}
if (call->name() == "or") {
auto lhs = fromExpression(scanSpec, *call->inputs()[0]);
auto rhs = fromExpression(scanSpec, *call->inputs()[1]);
if (!lhs || !rhs) {
return nullptr;
}
return std::make_unique<OrNode>(std::move(lhs), std::move(rhs));
auto lhs = fromExpression(scanSpec, *call->inputs()[0], negated);
auto rhs = fromExpression(scanSpec, *call->inputs()[1], negated);
return negated ? AndNode::create(std::move(lhs), std::move(rhs))
: OrNode::create(std::move(lhs), std::move(rhs));
}
if (call->name() == "not") {
return fromExpression(scanSpec, *call->inputs()[0], !negated);
}
if (call->name() == "endswith" || call->name() == "contains" ||
call->name() == "like" || call->name() == "startswith" ||
call->name() == "in" || call->name() == "rlike" ||
call->name() == "isnotnull" || call->name() == "coalesce" ||
call->name() == "might_contain") {
call->name() == "rlike" || call->name() == "isnotnull" ||
call->name() == "coalesce" || call->name() == "might_contain") {
return nullptr;
}
try {
Subfield subfield;
auto filter = exec::leafCallToSubfieldFilter(*call, subfield);
auto filter = exec::leafCallToSubfieldFilter(*call, subfield, negated);
if (!filter) {
return nullptr;
}
Expand All @@ -180,7 +179,7 @@ std::unique_ptr<MetadataFilter::Node> MetadataFilter::Node::fromExpression(
}

MetadataFilter::MetadataFilter(ScanSpec& scanSpec, const core::ITypedExpr& expr)
: root_(Node::fromExpression(scanSpec, expr)) {}
: root_(Node::fromExpression(scanSpec, expr, false)) {}

void MetadataFilter::eval(
std::vector<std::pair<LeafNode*, std::vector<uint64_t>>>& leafNodeResults,
Expand Down
1 change: 0 additions & 1 deletion velox/dwio/common/MetadataFilter.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@ class MetadataFilter {
class Node;
class AndNode;
class OrNode;
class NotNode;

std::shared_ptr<Node> root_;
};
Expand Down
49 changes: 38 additions & 11 deletions velox/dwio/common/tests/E2EFilterTestBase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -429,17 +429,17 @@ void E2EFilterTestBase::testMetadataFilterImpl(
int64_t originalIndex = 0;
auto nextExpectedIndex = [&]() -> int64_t {
for (;;) {
if (originalIndex >= batches.size() * kRowsInGroup) {
if (originalIndex >= batches.size() * batchSize_) {
return -1;
}
auto& batch = batches[originalIndex / kRowsInGroup];
auto& batch = batches[originalIndex / batchSize_];
auto vecA = batch->as<RowVector>()->childAt(0)->asFlatVector<int64_t>();
auto vecC = batch->as<RowVector>()
->childAt(1)
->as<RowVector>()
->childAt(0)
->asFlatVector<int64_t>();
auto j = originalIndex++ % kRowsInGroup;
auto j = originalIndex++ % batchSize_;
auto a = vecA->valueAt(j);
auto c = vecC->valueAt(j);
if (validationFilter(a, c)) {
Expand All @@ -451,8 +451,8 @@ void E2EFilterTestBase::testMetadataFilterImpl(
for (int i = 0; i < result->size(); ++i) {
auto totalIndex = nextExpectedIndex();
ASSERT_GE(totalIndex, 0);
auto& expected = batches[totalIndex / kRowsInGroup];
vector_size_t j = totalIndex % kRowsInGroup;
auto& expected = batches[totalIndex / batchSize_];
vector_size_t j = totalIndex % batchSize_;
ASSERT_TRUE(result->equalValueAt(expected.get(), i, j))
<< result->toString(i) << " vs " << expected->toString(j);
}
Expand All @@ -461,14 +461,20 @@ void E2EFilterTestBase::testMetadataFilterImpl(
}

void E2EFilterTestBase::testMetadataFilter() {
flushEveryNBatches_ = 1;
batchSize_ = 10;
test::VectorMaker vectorMaker(leafPool_.get());
functions::prestosql::registerAllScalarFunctions();
parse::registerTypeResolver();

// a: bigint, b: struct<c: bigint>
std::vector<RowVectorPtr> batches;
for (int i = 0; i < 10; ++i) {
auto a = BaseVector::create<FlatVector<int64_t>>(
BIGINT(), kRowsInGroup, leafPool_.get());
BIGINT(), batchSize_, leafPool_.get());
auto c = BaseVector::create<FlatVector<int64_t>>(
BIGINT(), kRowsInGroup, leafPool_.get());
for (int j = 0; j < kRowsInGroup; ++j) {
BIGINT(), batchSize_, leafPool_.get());
for (int j = 0; j < batchSize_; ++j) {
a->set(j, i);
c->set(j, i);
}
Expand All @@ -485,10 +491,8 @@ void E2EFilterTestBase::testMetadataFilter() {
a->size(),
std::vector<VectorPtr>({a, b})));
}
writeToMemory(batches[0]->type(), batches, true);
writeToMemory(batches[0]->type(), batches, false);

functions::prestosql::registerAllScalarFunctions();
parse::registerTypeResolver();
testMetadataFilterImpl(
batches,
common::Subfield("a"),
Expand All @@ -509,6 +513,29 @@ void E2EFilterTestBase::testMetadataFilter() {
nullptr,
"a in (1, 3, 8) or a >= 9",
[](int64_t a, int64_t) { return a == 1 || a == 3 || a == 8 || a >= 9; });
testMetadataFilterImpl(
batches,
common::Subfield("a"),
nullptr,
"not (a not in (2, 3, 5, 7))",
[](int64_t a, int64_t) {
return !!(a == 2 || a == 3 || a == 5 || a == 7);
});

{
SCOPED_TRACE("Values not unique in row group");
auto a = vectorMaker.flatVector<int64_t>(batchSize_, folly::identity);
auto c = vectorMaker.flatVector<int64_t>(batchSize_, folly::identity);
auto b = vectorMaker.rowVector({"c"}, {c});
batches = {vectorMaker.rowVector({"a", "b"}, {a, b})};
writeToMemory(batches[0]->type(), batches, false);
testMetadataFilterImpl(
batches,
common::Subfield("a"),
nullptr,
"not (a = 1 and b.c = 2)",
[](int64_t a, int64_t c) { return !(a == 1 && c == 2); });
}
}

void E2EFilterTestBase::testSubfieldsPruning() {
Expand Down
Loading

0 comments on commit 45fe7ae

Please sign in to comment.