Skip to content

Commit

Permalink
Replace coalesce function with a special form (facebookincubator#301)
Browse files Browse the repository at this point in the history
Summary:
X-link: pytorch/torcharrow#301

Coalesce needs to be a special form expression and cannot be a function because
it is not supposed to evaluate remaining inputs once there are no nulls. For
example, if column a has no nulls, the following expression should not try to
evaluate `b / 0` and should not fail.

```
coalesce(a, b / 0)
```

This change introduces CoalesceExpr special form and deletes coalesce function.

Also, fix a crash due to null pointer dereference in FlatVector::mutableRawValues.

Pull Request resolved: facebookincubator#1430

Reviewed By: laithsakka

Differential Revision: D35674128

Pulled By: mbasmanova

fbshipit-source-id: 2d16cb3404843322da67b699e7163d53757225bd
  • Loading branch information
mbasmanova authored and Arty-Maly committed May 13, 2022
1 parent 5f20256 commit 0e5431e
Show file tree
Hide file tree
Showing 15 changed files with 150 additions and 114 deletions.
6 changes: 6 additions & 0 deletions velox/docs/develop/expression-evaluation.rst
Expand Up @@ -79,6 +79,12 @@ try
Handles errors generated by the input expression by returning nulls for the
corresponding rows.

coalesce
COALESCE expression. Takes multiple input expressions of the same type.

Returns the first non-null value in the argument list. Like an IF or SWITCH
expression, arguments are only evaluated if necessary.

When evaluating AND and OR expressions, the engine adaptively reorders the
conjuncts to evaluate the cheapest most decisive conjuncts first. E.g. the AND
expression chooses to evaluate the cheapest conjunct that returns FALSE most
Expand Down
1 change: 1 addition & 0 deletions velox/expression/CMakeLists.txt
Expand Up @@ -20,6 +20,7 @@ target_link_libraries(velox_expression_functions velox_common_base)
add_library(
velox_expression
CastExpr.cpp
CoalesceExpr.cpp
ControlExpr.cpp
EvalCtx.cpp
Expr.cpp
Expand Down
66 changes: 66 additions & 0 deletions velox/expression/CoalesceExpr.cpp
@@ -0,0 +1,66 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "velox/expression/CoalesceExpr.h"
#include "velox/expression/VarSetter.h"

namespace facebook::velox::exec {

CoalesceExpr::CoalesceExpr(TypePtr type, std::vector<ExprPtr>&& inputs)
: SpecialForm(std::move(type), std::move(inputs), kCoalesce) {
for (auto i = 1; i < inputs_.size(); i++) {
VELOX_USER_CHECK_EQ(
inputs_[0]->type()->kind(),
inputs_[i]->type()->kind(),
"Inputs to coalesce must have the same type");
}
}

void CoalesceExpr::evalSpecialForm(
const SelectivityVector& rows,
EvalCtx* context,
VectorPtr* result) {
// Make sure to include current expression in the error message in case of an
// exception.
ExceptionContextSetter exceptionContext(
{[](auto* expr) { return static_cast<Expr*>(expr)->toString(); }, this});

// Null positions to populate.
exec::LocalSelectivityVector activeRowsHolder(context, rows.end());
auto activeRows = activeRowsHolder.get();
*activeRows = rows;

// Fix finalSelection at "rows" unless already fixed.
VarSetter finalSelection(
context->mutableFinalSelection(), &rows, context->isFinalSelection());
VarSetter isFinalSelection(context->mutableIsFinalSelection(), false);

for (int i = 0; i < inputs_.size(); i++) {
inputs_[i]->eval(*activeRows, context, result);

const uint64_t* rawNulls = (*result)->flatRawNulls(*activeRows);
if (!rawNulls) {
// No nulls left.
return;
}

activeRows->deselectNonNulls(rawNulls, 0, activeRows->end());
if (!activeRows->hasSelections()) {
// No nulls left.
return;
}
}
}
} // namespace facebook::velox::exec
37 changes: 37 additions & 0 deletions velox/expression/CoalesceExpr.h
@@ -0,0 +1,37 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once

#include "velox/expression/ControlExpr.h"

namespace facebook::velox::exec {

const char* const kCoalesce = "coalesce";

class CoalesceExpr : public SpecialForm {
public:
CoalesceExpr(TypePtr type, std::vector<ExprPtr>&& inputs);

void evalSpecialForm(
const SelectivityVector& rows,
EvalCtx* context,
VectorPtr* result) override;

bool propagatesNulls() const override {
return false;
}
};
} // namespace facebook::velox::exec
5 changes: 4 additions & 1 deletion velox/expression/ExprCompiler.cpp
Expand Up @@ -15,8 +15,8 @@
*/

#include "velox/expression/ExprCompiler.h"
#include "velox/core/SimpleFunctionMetadata.h"
#include "velox/expression/CastExpr.h"
#include "velox/expression/CoalesceExpr.h"
#include "velox/expression/ControlExpr.h"
#include "velox/expression/Expr.h"
#include "velox/expression/SimpleFunctionRegistry.h"
Expand Down Expand Up @@ -193,6 +193,9 @@ ExprPtr getSpecialForm(
VELOX_CHECK_EQ(compiledChildren.size(), 1);
return std::make_shared<TryExpr>(type, std::move(compiledChildren[0]));
}
if (name == kCoalesce) {
return std::make_shared<CoalesceExpr>(type, std::move(compiledChildren));
}
return nullptr;
}

Expand Down
1 change: 1 addition & 0 deletions velox/expression/tests/CMakeLists.txt
Expand Up @@ -17,6 +17,7 @@ add_executable(
ExprTest.cpp
ExprStatsTest.cpp
CastExprTest.cpp
CoalesceTest.cpp
MapWriterTest.cpp
ArrayWriterTest.cpp
RowWriterTest.cpp
Expand Down
Expand Up @@ -39,23 +39,36 @@ TEST_F(CoalesceTest, basic) {

auto row = makeRowVector({first, second, third});
auto result = evaluate<FlatVector<int32_t>>("coalesce(c0, c1, c2)", row);
for (int i = 0; i < size; ++i) {
EXPECT_EQ(result->valueAt(i), i * pow(10, i % 3)) << "at " << i;
}
auto expectedResult = makeFlatVector<int32_t>(
size, [](auto row) { return row * pow(10, row % 3); });
assertEqualVectors(expectedResult, result);

// Verify that input expressions are evaluated only on rows that are still
// null after evaluating all the preceding inputs and not evaluated at all if
// there are no nulls remaining.

// The last expression 'c1 / 0' should not be evaluated.
result = evaluate<FlatVector<int32_t>>(
"coalesce(c0, c1, c2, cast(c1 / 0 as integer))", row);
assertEqualVectors(expectedResult, result);

// The second expression 'c1 / (c1 % 3)' should not be evaluated on rows where
// c1 % 3 is zero.
result = evaluate<FlatVector<int32_t>>(
"coalesce(c0, cast(c1 / (c1 % 3) as integer), c2)", row);
assertEqualVectors(expectedResult, result);

result = evaluate<FlatVector<int32_t>>("coalesce(c2, c1, c0)", row);
for (int i = 0; i < size; ++i) {
EXPECT_EQ(result->valueAt(i), i * 100) << "at " << i;
}
expectedResult =
makeFlatVector<int32_t>(size, [](auto row) { return row * 100; });
assertEqualVectors(expectedResult, result);

result = evaluate<FlatVector<int32_t>>("coalesce(c0, c1)", row);
for (int i = 0; i < size; ++i) {
if (i % 3 == 2) {
EXPECT_TRUE(result->isNullAt(i));
} else {
EXPECT_EQ(result->valueAt(i), i * pow(10, i % 3)) << "at " << i;
}
}
expectedResult = makeFlatVector<int32_t>(
size,
[](auto row) { return row * pow(10, row % 3); },
[](auto row) { return row % 3 == 2; });
assertEqualVectors(expectedResult, result);
}

TEST_F(CoalesceTest, strings) {
Expand Down
12 changes: 6 additions & 6 deletions velox/expression/tests/TryExprTest.cpp
Expand Up @@ -82,14 +82,14 @@ TEST_F(TryExprTest, nestedTryChildErrors) {
// throw.
auto flatVector = makeFlatVector<StringView>(
5, [&](auto row) { return row % 2 == 0 ? "1" : "a"; });
auto result = evaluate<FlatVector<int64_t>>(
"try(coalesce(try(cast(c0 as integer)), 3))",
auto result = evaluate<FlatVector<int32_t>>(
"try(coalesce(try(cast(c0 as integer)), cast(3 as integer)))",
makeRowVector({flatVector}));

assertEqualVectors(
// Every other row throws an exception, which should get caught and
// coalesced to 3.
makeFlatVector<int64_t>({1, 3, 1, 3, 1}),
makeFlatVector<int32_t>({1, 3, 1, 3, 1}),
result);
}

Expand All @@ -104,12 +104,12 @@ TEST_F(TryExprTest, nestedTryParentErrors) {
size, [&](auto row) { return row % 3 == 0 ? "a" : "1"; });
auto col1 = makeFlatVector<StringView>(
size, [&](auto row) { return row % 3 == 1 ? "a" : "1"; });
auto result = evaluate<FlatVector<int64_t>>(
"try(cast(c1 as integer) + coalesce(try(cast(c0 as integer)), 3))",
auto result = evaluate<FlatVector<int32_t>>(
"try(cast(c1 as integer) + coalesce(try(cast(c0 as integer)), cast(3 as integer)))",
makeRowVector({col0, col1}));

assertEqualVectors(
makeFlatVector<int64_t>(
makeFlatVector<int32_t>(
size,
[&](auto row) {
if (row % 3 == 0) {
Expand Down
1 change: 0 additions & 1 deletion velox/functions/prestosql/CMakeLists.txt
Expand Up @@ -24,7 +24,6 @@ add_library(
ArrayIntersectExcept.cpp
ArrayMinMax.cpp
ArrayPosition.cpp
Coalesce.cpp
ElementAt.cpp
FilterFunctions.cpp
FromUnixTime.cpp
Expand Down
88 changes: 0 additions & 88 deletions velox/functions/prestosql/Coalesce.cpp

This file was deleted.

Expand Up @@ -23,7 +23,6 @@ void registerGeneralFunctions() {
VELOX_REGISTER_VECTOR_FUNCTION(udf_subscript, "subscript");
VELOX_REGISTER_VECTOR_FUNCTION(udf_transform, "transform");
VELOX_REGISTER_VECTOR_FUNCTION(udf_reduce, "reduce");
VELOX_REGISTER_VECTOR_FUNCTION(udf_coalesce, "coalesce");
VELOX_REGISTER_VECTOR_FUNCTION(udf_is_null, "is_null");
VELOX_REGISTER_VECTOR_FUNCTION(udf_in, "in");
VELOX_REGISTER_VECTOR_FUNCTION(udf_array_filter, "filter");
Expand Down
1 change: 0 additions & 1 deletion velox/functions/prestosql/tests/CMakeLists.txt
Expand Up @@ -34,7 +34,6 @@ add_executable(
BitwiseTest.cpp
CardinalityTest.cpp
CeilFloorTest.cpp
CoalesceTest.cpp
ComparisonsTest.cpp
DateTimeFunctionsTest.cpp
ElementAtTest.cpp
Expand Down
1 change: 0 additions & 1 deletion velox/functions/sparksql/Register.cpp
Expand Up @@ -55,7 +55,6 @@ static void workAroundRegistrationMacro(const std::string& prefix) {
VELOX_REGISTER_VECTOR_FUNCTION(udf_replace, prefix + "replace");
VELOX_REGISTER_VECTOR_FUNCTION(udf_upper, prefix + "upper");
// Logical.
VELOX_REGISTER_VECTOR_FUNCTION(udf_coalesce, prefix + "coalesce");
VELOX_REGISTER_VECTOR_FUNCTION(udf_is_null, prefix + "isnull");
VELOX_REGISTER_VECTOR_FUNCTION(udf_is_not_null, prefix + "isnotnull");
VELOX_REGISTER_VECTOR_FUNCTION(udf_not, prefix + "not");
Expand Down
3 changes: 2 additions & 1 deletion velox/parse/TypeResolver.cpp
Expand Up @@ -49,7 +49,8 @@ std::shared_ptr<const Type> resolveType(
return BOOLEAN();
}

if (expr->getFunctionName() == "try") {
if (expr->getFunctionName() == "try" ||
expr->getFunctionName() == "coalesce") {
VELOX_CHECK(!inputs.empty());
return inputs.front()->type();
}
Expand Down
2 changes: 1 addition & 1 deletion velox/vector/FlatVector.h
Expand Up @@ -202,7 +202,7 @@ class FlatVector final : public SimpleVector<T> {
T* mutableRawValues() {
if (!values_ || !values_->unique()) {
BufferPtr newValues =
AlignedBuffer::allocate<T>(BaseVector::length_, values_->pool());
AlignedBuffer::allocate<T>(BaseVector::length_, BaseVector::pool());
if (values_) {
// This codepath is not yet enabled for OPAQUE types (asMutable will
// fail below)
Expand Down

0 comments on commit 0e5431e

Please sign in to comment.