Skip to content

Commit

Permalink
[Bugfix] Fix wrong result when process 'is null' in condition expr in…
Browse files Browse the repository at this point in the history
… dictionary optimization (#8869)
  • Loading branch information
stdpain authored and kangkaisen committed Jul 23, 2022
1 parent 4a5216d commit 25d72b3
Show file tree
Hide file tree
Showing 6 changed files with 75 additions and 43 deletions.
2 changes: 1 addition & 1 deletion be/src/runtime/global_dicts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@ Status DictOptimizeParser::_rewrite_expr_ctxs(std::vector<ExprContext*>* pexpr_c
DictOptimizeContext dict_ctx;
_check_could_apply_dict_optimize(expr_ctx, &dict_ctx);
if (dict_ctx.could_apply_dict_optimize) {
eval_expression(expr_ctx, &dict_ctx, slot_ids[i]);
RETURN_IF_ERROR(eval_expression(expr_ctx, &dict_ctx, slot_ids[i]));
auto* dict_ctx_handle = _free_pool.add(new DictOptimizeContext(std::move(dict_ctx)));
Expr* replaced_expr = _free_pool.add(new DictFuncExpr(*expr_ctx->root(), dict_ctx_handle));

Expand Down
30 changes: 23 additions & 7 deletions be/src/storage/column_operator_predicate.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

#pragma once

#include <cstdint>
#include <type_traits>

#include "column/nullable_column.h"
#include "storage/vectorized_column_predicate.h"

Expand All @@ -17,6 +20,19 @@ class ColumnOperatorPredicate final : public ColumnPredicate {
ColumnOperatorPredicate(const TypeInfoPtr& type_info, ColumnId id, Args... args)
: ColumnPredicate(type_info, id), _predicate_operator(std::forward<Args>(args)...) {}

// evaluate
uint8_t evaluate_at(int index, const ColumnType* column) const {
return _predicate_operator.eval_at(column, index);
}

// evaluate with nullable
uint8_t evaluate_at_nullable(int index, const uint8_t* null_data, const ColumnType* column) const {
if constexpr (SpecColumnOperator::skip_null) {
return !null_data[index] && _predicate_operator.eval_at(column, index);
}
return _predicate_operator.eval_at(column, index);
}

Status evaluate(const Column* column, uint8_t* sel, uint16_t from, uint16_t to) const override {
// get raw column
const ColumnType* lowcard_column;
Expand All @@ -29,13 +45,13 @@ class ColumnOperatorPredicate final : public ColumnPredicate {
}
if (!column->has_null()) {
for (size_t i = from; i < to; i++) {
sel[i] = _predicate_operator.eval_at(lowcard_column, i);
sel[i] = evaluate_at(i, lowcard_column);
}
} else {
/* must use uint8_t* to make vectorized effect */
const uint8_t* null_data = down_cast<const NullableColumn*>(column)->immutable_null_column_data().data();
for (size_t i = from; i < to; i++) {
sel[i] = !null_data[i] && _predicate_operator.eval_at(lowcard_column, i);
sel[i] = evaluate_at_nullable(i, null_data, lowcard_column);
}
}
return Status::OK();
Expand All @@ -53,13 +69,13 @@ class ColumnOperatorPredicate final : public ColumnPredicate {
}
if (!column->has_null()) {
for (size_t i = from; i < to; i++) {
sel[i] = (sel[i] && _predicate_operator.eval_at(lowcard_column, i));
sel[i] = (sel[i] && evaluate_at(i, lowcard_column));
}
} else {
/* must use uint8_t* to make vectorized effect */
const uint8_t* null_data = down_cast<const NullableColumn*>(column)->immutable_null_column_data().data();
for (size_t i = from; i < to; i++) {
sel[i] = (sel[i] && !null_data[i] && _predicate_operator.eval_at(lowcard_column, i));
sel[i] = (sel[i] && evaluate_at_nullable(i, null_data, lowcard_column));
}
}
return Status::OK();
Expand All @@ -83,7 +99,7 @@ class ColumnOperatorPredicate final : public ColumnPredicate {
/* must use uint8_t* to make vectorized effect */
const uint8_t* null_data = down_cast<const NullableColumn*>(column)->immutable_null_column_data().data();
for (size_t i = from; i < to; i++) {
sel[i] = (sel[i] || (!null_data[i] && _predicate_operator.eval_at(lowcard_column, i)));
sel[i] = (sel[i] || evaluate_at_nullable(i, null_data, lowcard_column));
}
}
return Status::OK();
Expand All @@ -105,15 +121,15 @@ class ColumnOperatorPredicate final : public ColumnPredicate {
for (uint16_t i = 0; i < sel_size; ++i) {
uint16_t data_idx = sel[i];
sel[new_size] = data_idx;
new_size += _predicate_operator.eval_at(lowcard_column, data_idx);
new_size += evaluate_at(data_idx, lowcard_column);
}
} else {
/* must use uint8_t* to make vectorized effect */
const uint8_t* null_data = down_cast<const NullableColumn*>(column)->immutable_null_column_data().data();
for (uint16_t i = 0; i < sel_size; ++i) {
uint16_t data_idx = sel[i];
sel[new_size] = data_idx;
new_size += !null_data[data_idx] && _predicate_operator.eval_at(lowcard_column, data_idx);
new_size += evaluate_at_nullable(data_idx, null_data, lowcard_column);
}
}
return new_size;
Expand Down
8 changes: 6 additions & 2 deletions be/src/storage/column_predicate_dict_conjuct.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,17 @@ namespace starrocks::vectorized {

// DictConjuctPredicateOperator for global dictionary optimization.
// It converts all predicates into code mappings.
// the null input will deal with 0
// eg: where key = 'SR' will convert to
// [0] "SR" -> true
// [1] "RK" -> false
// [0] NULL -> false
// [1] "SR" -> true
// [2] "RK" -> false
//

template <FieldType field_type>
class DictConjuctPredicateOperator {
public:
static constexpr bool skip_null = false;
DictConjuctPredicateOperator(std::vector<uint8_t> code_mapping) : _code_mapping(std::move(code_mapping)) {}

uint8_t eval_at(const LowCardDictColumn* lowcard_column, int idx) const {
Expand Down
2 changes: 1 addition & 1 deletion be/src/storage/rowset/column_iterator.h
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ class ColumnIterator {
// dictionary codes from the column |codes|.
// |codes| must be of type `FixedLengthColumn<int32_t>` or `NullableColumn<FixedLengthColumn<int32_t>`
// and assume no `null` value in |codes|.
Status decode_dict_codes(const vectorized::Column& codes, vectorized::Column* words);
virtual Status decode_dict_codes(const vectorized::Column& codes, vectorized::Column* words);

// given a list of ordinals, fetch corresponding values.
// |ordinals| must be ascending sorted.
Expand Down
41 changes: 41 additions & 0 deletions be/src/storage/rowset/dictcode_column_iterator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,51 @@
#include "storage/rowset/dictcode_column_iterator.h"

#include "column/column_helper.h"
#include "gutil/casts.h"
#include "storage/rowset/scalar_column_iterator.h"

namespace starrocks {

Status GlobalDictCodeColumnIterator::decode_dict_codes(const vectorized::Column& codes, vectorized::Column* words) {
const auto& code_data =
down_cast<const vectorized::Int32Column*>(vectorized::ColumnHelper::get_data_column(&codes))->get_data();
const size_t size = code_data.size();

LowCardDictColumn::Container* container =
&down_cast<LowCardDictColumn*>(vectorized::ColumnHelper::get_data_column(words))->get_data();
bool output_nullable = words->is_nullable();

auto& res_data = *container;
res_data.resize(size);
#ifndef NDEBUG
for (size_t i = 0; i < size; ++i) {
DCHECK(code_data[i] <= vectorized::DICT_DECODE_MAX_SIZE);
if (code_data[i] < 0) {
DCHECK(output_nullable);
}
}
#endif
{
using namespace vectorized;
// res_data[i] = _local_to_global[code_data[i]];
SIMDGather::gather(res_data.data(), _local_to_global, code_data.data(), DICT_DECODE_MAX_SIZE, size);
}

if (output_nullable) {
// reserve null data
down_cast<vectorized::NullableColumn*>(words)->null_column_data().resize(size);
const auto& null_data = down_cast<const vectorized::NullableColumn&>(codes).immutable_null_column_data();
if (codes.has_null()) {
// assign code 0 if input data is null
for (size_t i = 0; i < size; ++i) {
res_data[i] = null_data[i] == 0 ? res_data[i] : 0;
}
}
}

return Status::OK();
}

Status GlobalDictCodeColumnIterator::build_code_convert_map(ScalarColumnIterator* file_column_iter,
GlobalDictMap* global_dict,
std::vector<int16_t>* code_convert_map) {
Expand Down
35 changes: 3 additions & 32 deletions be/src/storage/rowset/dictcode_column_iterator.h
Original file line number Diff line number Diff line change
Expand Up @@ -125,39 +125,10 @@ class GlobalDictCodeColumnIterator final : public ColumnIterator {
return Status::NotSupported("GlobalDictCodeColumnIterator does not support next_dict_codes");
}

Status decode_dict_codes(const int32_t* codes, size_t size, vectorized::Column* words) override {
LowCardDictColumn::Container* container = nullptr;
bool output_nullable = words->is_nullable();

if (output_nullable) {
vectorized::ColumnPtr& data_column = down_cast<vectorized::NullableColumn*>(words)->data_column();
container = &down_cast<LowCardDictColumn*>(data_column.get())->get_data();
} else {
container = &down_cast<LowCardDictColumn*>(words)->get_data();
}

auto& res_data = *container;
res_data.resize(size);
#ifndef NDEBUG
for (size_t i = 0; i < size; ++i) {
DCHECK(codes[i] <= vectorized::DICT_DECODE_MAX_SIZE);
if (codes[i] < 0) {
DCHECK(output_nullable);
}
}
#endif
{
using namespace vectorized;
// res_data[i] = _local_to_global[codes[i]];
SIMDGather::gather(res_data.data(), _local_to_global, codes, DICT_DECODE_MAX_SIZE, size);
}
Status decode_dict_codes(const vectorized::Column& codes, vectorized::Column* words) override;

if (output_nullable) {
auto& null_data = down_cast<vectorized::NullableColumn*>(words)->null_column_data();
null_data.resize(size);
}

return Status::OK();
Status decode_dict_codes(const int32_t* codes, size_t size, vectorized::Column* words) override {
return Status::NotSupported("unsupport decode_dict_codes in GlobalDictCodeColumnIterator");
}

Status get_row_ranges_by_zone_map(const std::vector<const vectorized::ColumnPredicate*>& predicates,
Expand Down

0 comments on commit 25d72b3

Please sign in to comment.