Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Bugfix] Fix wrong result when process 'is null' in condition expr in dictionary optimization #8869

Merged
merged 4 commits into from
Jul 21, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion be/src/runtime/global_dict/parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,7 @@ Status DictOptimizeParser::_rewrite_expr_ctxs(std::vector<ExprContext*>* pexpr_c
for (int i = 0; i < expr_ctxs.size(); ++i) {
auto& expr_ctx = expr_ctxs[i];
auto expr = expr_ctx->root();
rewrite_expr(expr_ctx, expr, slot_ids[i]);
RETURN_IF_ERROR(rewrite_expr(expr_ctx, expr, slot_ids[i]));
}
return Status::OK();
}
Expand Down
30 changes: 23 additions & 7 deletions be/src/storage/column_operator_predicate.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

#pragma once

#include <cstdint>
#include <type_traits>

#include "column/nullable_column.h"
#include "storage/vectorized_column_predicate.h"

Expand All @@ -17,6 +20,19 @@ class ColumnOperatorPredicate final : public ColumnPredicate {
ColumnOperatorPredicate(const TypeInfoPtr& type_info, ColumnId id, Args... args)
: ColumnPredicate(type_info, id), _predicate_operator(std::forward<Args>(args)...) {}

// evaluate
uint8_t evaluate_at(int index, const ColumnType* column) const {
return _predicate_operator.eval_at(column, index);
}

// evaluate with nullable
uint8_t evaluate_at_nullable(int index, const uint8_t* null_data, const ColumnType* column) const {
if constexpr (SpecColumnOperator::skip_null) {
return !null_data[index] && _predicate_operator.eval_at(column, index);
}
return _predicate_operator.eval_at(column, index);
}

Status evaluate(const Column* column, uint8_t* sel, uint16_t from, uint16_t to) const override {
// get raw column
const ColumnType* lowcard_column;
Expand All @@ -29,13 +45,13 @@ class ColumnOperatorPredicate final : public ColumnPredicate {
}
if (!column->has_null()) {
for (size_t i = from; i < to; i++) {
sel[i] = _predicate_operator.eval_at(lowcard_column, i);
sel[i] = evaluate_at(i, lowcard_column);
}
} else {
/* must use uint8_t* to make vectorized effect */
const uint8_t* null_data = down_cast<const NullableColumn*>(column)->immutable_null_column_data().data();
for (size_t i = from; i < to; i++) {
sel[i] = !null_data[i] && _predicate_operator.eval_at(lowcard_column, i);
sel[i] = evaluate_at_nullable(i, null_data, lowcard_column);
}
}
return Status::OK();
Expand All @@ -53,13 +69,13 @@ class ColumnOperatorPredicate final : public ColumnPredicate {
}
if (!column->has_null()) {
for (size_t i = from; i < to; i++) {
sel[i] = (sel[i] && _predicate_operator.eval_at(lowcard_column, i));
sel[i] = (sel[i] && evaluate_at(i, lowcard_column));
}
} else {
/* must use uint8_t* to make vectorized effect */
const uint8_t* null_data = down_cast<const NullableColumn*>(column)->immutable_null_column_data().data();
for (size_t i = from; i < to; i++) {
sel[i] = (sel[i] && !null_data[i] && _predicate_operator.eval_at(lowcard_column, i));
sel[i] = (sel[i] && evaluate_at_nullable(i, null_data, lowcard_column));
}
}
return Status::OK();
Expand All @@ -83,7 +99,7 @@ class ColumnOperatorPredicate final : public ColumnPredicate {
/* must use uint8_t* to make vectorized effect */
const uint8_t* null_data = down_cast<const NullableColumn*>(column)->immutable_null_column_data().data();
for (size_t i = from; i < to; i++) {
sel[i] = (sel[i] || (!null_data[i] && _predicate_operator.eval_at(lowcard_column, i)));
sel[i] = (sel[i] || evaluate_at_nullable(i, null_data, lowcard_column));
}
}
return Status::OK();
Expand All @@ -105,15 +121,15 @@ class ColumnOperatorPredicate final : public ColumnPredicate {
for (uint16_t i = 0; i < sel_size; ++i) {
uint16_t data_idx = sel[i];
sel[new_size] = data_idx;
new_size += _predicate_operator.eval_at(lowcard_column, data_idx);
new_size += evaluate_at(data_idx, lowcard_column);
}
} else {
/* must use uint8_t* to make vectorized effect */
const uint8_t* null_data = down_cast<const NullableColumn*>(column)->immutable_null_column_data().data();
for (uint16_t i = 0; i < sel_size; ++i) {
uint16_t data_idx = sel[i];
sel[new_size] = data_idx;
new_size += !null_data[data_idx] && _predicate_operator.eval_at(lowcard_column, data_idx);
new_size += evaluate_at_nullable(data_idx, null_data, lowcard_column);
}
}
return new_size;
Expand Down
8 changes: 6 additions & 2 deletions be/src/storage/column_predicate_dict_conjuct.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,17 @@ namespace starrocks::vectorized {

// DictConjuctPredicateOperator for global dictionary optimization.
// It converts all predicates into code mappings.
// the null input will deal with 0
// eg: where key = 'SR' will convert to
// [0] "SR" -> true
// [1] "RK" -> false
// [0] NULL -> false
// [1] "SR" -> true
// [2] "RK" -> false
//

template <FieldType field_type>
class DictConjuctPredicateOperator {
public:
static constexpr bool skip_null = false;
DictConjuctPredicateOperator(std::vector<uint8_t> code_mapping) : _code_mapping(std::move(code_mapping)) {}

uint8_t eval_at(const LowCardDictColumn* lowcard_column, int idx) const {
Expand Down
2 changes: 1 addition & 1 deletion be/src/storage/rowset/column_iterator.h
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ class ColumnIterator {
// dictionary codes from the column |codes|.
// |codes| must be of type `FixedLengthColumn<int32_t>` or `NullableColumn<FixedLengthColumn<int32_t>`
// and assume no `null` value in |codes|.
Status decode_dict_codes(const vectorized::Column& codes, vectorized::Column* words);
virtual Status decode_dict_codes(const vectorized::Column& codes, vectorized::Column* words);

// given a list of ordinals, fetch corresponding values.
// |ordinals| must be ascending sorted.
Expand Down
41 changes: 41 additions & 0 deletions be/src/storage/rowset/dictcode_column_iterator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,51 @@
#include "storage/rowset/dictcode_column_iterator.h"

#include "column/column_helper.h"
#include "gutil/casts.h"
#include "storage/rowset/scalar_column_iterator.h"

namespace starrocks {

Status GlobalDictCodeColumnIterator::decode_dict_codes(const vectorized::Column& codes, vectorized::Column* words) {
const auto& code_data =
down_cast<const vectorized::Int32Column*>(vectorized::ColumnHelper::get_data_column(&codes))->get_data();
const size_t size = code_data.size();

LowCardDictColumn::Container* container =
&down_cast<LowCardDictColumn*>(vectorized::ColumnHelper::get_data_column(words))->get_data();
bool output_nullable = words->is_nullable();

auto& res_data = *container;
res_data.resize(size);
#ifndef NDEBUG
for (size_t i = 0; i < size; ++i) {
DCHECK(code_data[i] <= vectorized::DICT_DECODE_MAX_SIZE);
if (code_data[i] < 0) {
DCHECK(output_nullable);
}
}
#endif
{
using namespace vectorized;
// res_data[i] = _local_to_global[code_data[i]];
SIMDGather::gather(res_data.data(), _local_to_global, code_data.data(), DICT_DECODE_MAX_SIZE, size);
}

if (output_nullable) {
// reserve null data
down_cast<vectorized::NullableColumn*>(words)->null_column_data().resize(size);
const auto& null_data = down_cast<const vectorized::NullableColumn&>(codes).immutable_null_column_data();
if (codes.has_null()) {
// assign code 0 if input data is null
for (size_t i = 0; i < size; ++i) {
res_data[i] = null_data[i] == 0 ? res_data[i] : 0;
}
}
}

return Status::OK();
}

Status GlobalDictCodeColumnIterator::build_code_convert_map(ScalarColumnIterator* file_column_iter,
GlobalDictMap* global_dict,
std::vector<int16_t>* code_convert_map) {
Expand Down
35 changes: 3 additions & 32 deletions be/src/storage/rowset/dictcode_column_iterator.h
Original file line number Diff line number Diff line change
Expand Up @@ -128,39 +128,10 @@ class GlobalDictCodeColumnIterator final : public ColumnIterator {
return Status::NotSupported("GlobalDictCodeColumnIterator does not support next_dict_codes");
}

Status decode_dict_codes(const int32_t* codes, size_t size, vectorized::Column* words) override {
LowCardDictColumn::Container* container = nullptr;
bool output_nullable = words->is_nullable();

if (output_nullable) {
vectorized::ColumnPtr& data_column = down_cast<vectorized::NullableColumn*>(words)->data_column();
container = &down_cast<LowCardDictColumn*>(data_column.get())->get_data();
} else {
container = &down_cast<LowCardDictColumn*>(words)->get_data();
}

auto& res_data = *container;
res_data.resize(size);
#ifndef NDEBUG
for (size_t i = 0; i < size; ++i) {
DCHECK(codes[i] <= vectorized::DICT_DECODE_MAX_SIZE);
if (codes[i] < 0) {
DCHECK(output_nullable);
}
}
#endif
{
using namespace vectorized;
// res_data[i] = _local_to_global[codes[i]];
SIMDGather::gather(res_data.data(), _local_to_global, codes, DICT_DECODE_MAX_SIZE, size);
}
Status decode_dict_codes(const vectorized::Column& codes, vectorized::Column* words) override;

if (output_nullable) {
auto& null_data = down_cast<vectorized::NullableColumn*>(words)->null_column_data();
null_data.resize(size);
}

return Status::OK();
Status decode_dict_codes(const int32_t* codes, size_t size, vectorized::Column* words) override {
return Status::NotSupported("unsupport decode_dict_codes in GlobalDictCodeColumnIterator");
}

Status get_row_ranges_by_zone_map(const std::vector<const vectorized::ColumnPredicate*>& predicates,
Expand Down