From 87163ff66bfedaf5065df26b86edb04c156ec66b Mon Sep 17 00:00:00 2001 From: liuyehcf <1559500551@qq.com> Date: Fri, 10 Feb 2023 13:00:41 +0800 Subject: [PATCH] [BugFix] Adjust NULL_TYPE hack processing(2) (#17483) Signed-off-by: liuyehcf <1559500551@qq.com> --- be/src/column/array_column.cpp | 1 + be/src/column/array_column.h | 2 +- be/src/column/map_column.cpp | 2 + be/src/column/map_column.h | 4 +- be/src/column/nullable_column.h | 12 +- be/src/exprs/cast_nested.cpp | 3 + be/src/exprs/split.cpp | 3 +- be/src/storage/chunk_helper.cpp | 22 +- be/test/column/array_column_test.cpp | 603 +++++++++--------- be/test/column/map_column_test.cpp | 96 +-- be/test/exprs/agg/aggregate_test.cpp | 6 +- be/test/exprs/array_element_expr_test.cpp | 9 +- be/test/exprs/bitmap_functions_test.cpp | 6 +- be/test/storage/column_aggregator_test.cpp | 10 +- .../rowset/column_reader_writer_test.cpp | 18 +- be/test/storage/rowset/map_column_rw_test.cpp | 34 +- .../com/starrocks/analysis/CloneExpr.java | 6 + .../java/com/starrocks/analysis/Expr.java | 4 +- .../sql/plan/ScalarOperatorToExpr.java | 71 ++- .../com/starrocks/sql/plan/ArrayTypeTest.java | 16 + 20 files changed, 435 insertions(+), 493 deletions(-) diff --git a/be/src/column/array_column.cpp b/be/src/column/array_column.cpp index a011aac11fe5c..2fca52ddf4ea2 100644 --- a/be/src/column/array_column.cpp +++ b/be/src/column/array_column.cpp @@ -35,6 +35,7 @@ void ArrayColumn::check_or_die() const { ArrayColumn::ArrayColumn(ColumnPtr elements, UInt32Column::Ptr offsets) : _elements(std::move(elements)), _offsets(std::move(offsets)) { + DCHECK(_elements->is_nullable()); if (_offsets->empty()) { _offsets->append(0); } diff --git a/be/src/column/array_column.h b/be/src/column/array_column.h index c42b116348374..9f679bf332c2f 100644 --- a/be/src/column/array_column.h +++ b/be/src/column/array_column.h @@ -189,7 +189,7 @@ class ArrayColumn final : public ColumnFactory { Status unfold_const_children(const starrocks::TypeDescriptor& type) override; private: - // _elements must be NullableColumn + // Elements must be NullableColumn to facilitate handling nested types. ColumnPtr _elements; // Offsets column will store the start position of every array element. // Offsets store more one data to indicate the end position. diff --git a/be/src/column/map_column.cpp b/be/src/column/map_column.cpp index 2660c8bce6039..5235ce27829e1 100644 --- a/be/src/column/map_column.cpp +++ b/be/src/column/map_column.cpp @@ -39,6 +39,8 @@ void MapColumn::check_or_die() const { MapColumn::MapColumn(ColumnPtr keys, ColumnPtr values, UInt32Column::Ptr offsets) : _keys(std::move(keys)), _values(std::move(values)), _offsets(std::move(offsets)) { + DCHECK(_keys->is_nullable()); + DCHECK(_values->is_nullable()); if (_offsets->empty()) { _offsets->append(0); } diff --git a/be/src/column/map_column.h b/be/src/column/map_column.h index 03b7cb1d71d02..8a6e70d6a7772 100644 --- a/be/src/column/map_column.h +++ b/be/src/column/map_column.h @@ -187,9 +187,9 @@ class MapColumn final : public ColumnFactory { Status unfold_const_children(const starrocks::TypeDescriptor& type) override; private: - // keys must be NullableColumn + // Keys must be NullableColumn to facilitate handling nested types. ColumnPtr _keys; - // values must be NullableColumn + // Values must be NullableColumn to facilitate handling nested types. ColumnPtr _values; // Offsets column will store the start position of every map element. // Offsets store more one data to indicate the end position. diff --git a/be/src/column/nullable_column.h b/be/src/column/nullable_column.h index a52738569f92a..21dbea9a2c14a 100644 --- a/be/src/column/nullable_column.h +++ b/be/src/column/nullable_column.h @@ -33,6 +33,14 @@ class NullableColumn final : public ColumnFactory { friend class ColumnFactory; public: + inline static ColumnPtr wrap_if_necessary(ColumnPtr column) { + if (column->is_nullable()) { + return column; + } + auto null = NullColumn::create(column->size(), 0); + return NullableColumn::create(std::move(column), std::move(null)); + } + NullableColumn(MutableColumnPtr&& data_column, MutableColumnPtr&& null_column); NullableColumn(ColumnPtr data_column, NullColumnPtr null_column); @@ -272,7 +280,7 @@ class NullableColumn final : public ColumnFactory { } std::string debug_item(size_t idx) const override { - DCHECK(_null_column->size() == _data_column->size()); + DCHECK_EQ(_null_column->size(), _data_column->size()); std::stringstream ss; if (_null_column->get_data()[idx]) { ss << "NULL"; @@ -283,7 +291,7 @@ class NullableColumn final : public ColumnFactory { } std::string debug_string() const override { - DCHECK(_null_column->size() == _data_column->size()); + DCHECK_EQ(_null_column->size(), _data_column->size()); std::stringstream ss; ss << "["; size_t size = _data_column->size(); diff --git a/be/src/exprs/cast_nested.cpp b/be/src/exprs/cast_nested.cpp index 7b50f20a72f7c..e879159ad97c3 100644 --- a/be/src/exprs/cast_nested.cpp +++ b/be/src/exprs/cast_nested.cpp @@ -39,6 +39,7 @@ StatusOr CastMapExpr::evaluate_checked(ExprContext* context, Chunk* p } else { casted_key_column = map_column->keys_column()->clone_shared(); } + casted_key_column = NullableColumn::wrap_if_necessary(casted_key_column); // cast value column if (_value_cast != nullptr) { @@ -48,6 +49,7 @@ StatusOr CastMapExpr::evaluate_checked(ExprContext* context, Chunk* p } else { casted_value_column = map_column->values_column()->clone_shared(); } + casted_value_column = NullableColumn::wrap_if_necessary(casted_value_column); auto casted_map = MapColumn::create(std::move(casted_key_column), std::move(casted_value_column), ColumnHelper::as_column(map_column->offsets_column()->clone_shared())); @@ -109,6 +111,7 @@ StatusOr CastArrayExpr::evaluate_checked(ExprContext* context, Chunk* } else { casted_element_column = array_column->elements_column()->clone_shared(); } + casted_element_column = NullableColumn::wrap_if_necessary(casted_element_column); auto casted_array = ArrayColumn::create(std::move(casted_element_column), diff --git a/be/src/exprs/split.cpp b/be/src/exprs/split.cpp index d37de0de524cc..6e11a40d33de3 100644 --- a/be/src/exprs/split.cpp +++ b/be/src/exprs/split.cpp @@ -176,7 +176,8 @@ StatusOr StringFunctions::split(FunctionContext* context, const starr } else { array_binary_column->reserve(row_nums * 5, haystack_columns->get_bytes().size() * sizeof(uint8_t)); - auto result_array = ArrayColumn::create(BinaryColumn::create(), UInt32Column::create()); + auto result_array = ArrayColumn::create(NullableColumn::create(BinaryColumn::create(), NullColumn::create()), + UInt32Column::create()); NullColumnPtr null_array = NullColumn::create(); for (int row = 0; row < row_nums; ++row) { array_offsets->append(offset); diff --git a/be/src/storage/chunk_helper.cpp b/be/src/storage/chunk_helper.cpp index 2e91475b5acdd..f57be54b90ff6 100644 --- a/be/src/storage/chunk_helper.cpp +++ b/be/src/storage/chunk_helper.cpp @@ -217,23 +217,23 @@ template struct ColumnPtrBuilder { template ColumnPtr operator()(size_t chunk_size, const Field& field, int precision, int scale) { - auto nullable = [&](ColumnPtr c) -> ColumnPtr { + auto NullableIfNeed = [&](ColumnPtr c) -> ColumnPtr { return field.is_nullable() ? NullableColumn::create(std::move(c), get_column_ptr(chunk_size)) : c; }; if constexpr (ftype == TYPE_ARRAY) { - auto elements = field.sub_field(0).create_column(); + auto elements = NullableColumn::wrap_if_necessary(field.sub_field(0).create_column()); auto offsets = get_column_ptr(chunk_size); auto array = ArrayColumn::create(std::move(elements), offsets); - return nullable(array); + return NullableIfNeed(array); } else if constexpr (ftype == TYPE_MAP) { - auto keys = field.sub_field(0).create_column(); - auto values = field.sub_field(1).create_column(); + auto keys = NullableColumn::wrap_if_necessary(field.sub_field(0).create_column()); + auto values = NullableColumn::wrap_if_necessary(field.sub_field(1).create_column()); auto offsets = get_column_ptr(chunk_size); auto map = MapColumn::create(std::move(keys), std::move(values), offsets); - return nullable(map); + return NullableIfNeed(map); } else if constexpr (ftype == TYPE_STRUCT) { std::vector names; std::vector fields; @@ -242,17 +242,17 @@ struct ColumnPtrBuilder { fields.template emplace_back(sub_field.create_column()); } auto struct_column = StructColumn::create(std::move(fields), std::move(names)); - return nullable(struct_column); + return NullableIfNeed(struct_column); } else { switch (ftype) { case TYPE_DECIMAL32: - return nullable(get_decimal_column_ptr(precision, scale, chunk_size)); + return NullableIfNeed(get_decimal_column_ptr(precision, scale, chunk_size)); case TYPE_DECIMAL64: - return nullable(get_decimal_column_ptr(precision, scale, chunk_size)); + return NullableIfNeed(get_decimal_column_ptr(precision, scale, chunk_size)); case TYPE_DECIMAL128: - return nullable(get_decimal_column_ptr(precision, scale, chunk_size)); + return NullableIfNeed(get_decimal_column_ptr(precision, scale, chunk_size)); default: { - return nullable(get_column_ptr::ColumnType, force>(chunk_size)); + return NullableIfNeed(get_column_ptr::ColumnType, force>(chunk_size)); } } } diff --git a/be/test/column/array_column_test.cpp b/be/test/column/array_column_test.cpp index 2ed7c0f75968f..c7a77dc6bf825 100644 --- a/be/test/column/array_column_test.cpp +++ b/be/test/column/array_column_test.cpp @@ -30,7 +30,7 @@ namespace starrocks { // NOLINTNEXTLINE PARALLEL_TEST(ArrayColumnTest, test_create) { auto offsets = UInt32Column::create(); - auto elements = Int32Column::create(); + auto elements = NullableColumn::create(Int32Column::create(), NullColumn::create()); auto column = ArrayColumn::create(elements, offsets); ASSERT_TRUE(column->is_array()); ASSERT_FALSE(column->is_nullable()); @@ -41,11 +41,11 @@ PARALLEL_TEST(ArrayColumnTest, test_create) { PARALLEL_TEST(ArrayColumnTest, test_array_column_update_if_overflow) { // normal auto offsets = UInt32Column::create(); - auto elements = BinaryColumn::create(); + auto elements = NullableColumn::create(BinaryColumn::create(), NullColumn::create()); auto column = ArrayColumn::create(elements, offsets); - elements->append("1"); - elements->append("2"); + elements->append_datum("1"); + elements->append_datum("2"); offsets->append(2); auto ret = column->upgrade_if_overflow(); ASSERT_TRUE(ret.ok()); @@ -54,36 +54,14 @@ PARALLEL_TEST(ArrayColumnTest, test_array_column_update_if_overflow) { auto array = column->get(0).get_array(); ASSERT_EQ(array[0].get_slice(), Slice("1")); ASSERT_EQ(array[1].get_slice(), Slice("2")); - -#ifdef NDEBUG - /* - // the test case case will use a lot of memory, so temp comment it - // upgrade - offsets = UInt32Column::create(); - elements = BinaryColumn::create(); - column = ArrayColumn::create(elements, offsets); - size_t item_count = 1<<30; - for (size_t i = 0; i < item_count; i++) { - elements->append(std::to_string(i)); - } - offsets->resize(item_count + 1); - for (size_t i = 0; i < item_count; i++) { - offsets->get_data()[i + 1] = i + 1; - } - ret = column->upgrade_if_overflow(); - ASSERT_TRUE(ret.ok()); - ASSERT_TRUE(ret.value() == nullptr); - ASSERT_TRUE(column->elements_column()->is_large_binary()); - */ -#endif } // NOLINTNEXTLINE PARALLEL_TEST(ArrayColumnTest, test_array_column_downgrade) { auto offsets = UInt32Column::create(); - auto elements = BinaryColumn::create(); - elements->append("1"); - elements->append("2"); + auto elements = NullableColumn::create(BinaryColumn::create(), NullColumn::create()); + elements->append_datum("1"); + elements->append_datum("2"); offsets->append(2); auto column = ArrayColumn::create(elements, offsets); ASSERT_FALSE(column->has_large_column()); @@ -92,10 +70,10 @@ PARALLEL_TEST(ArrayColumnTest, test_array_column_downgrade) { ASSERT_TRUE(ret.value() == nullptr); offsets = UInt32Column::create(); - auto large_elements = LargeBinaryColumn::create(); + auto large_elements = NullableColumn::create(LargeBinaryColumn::create(), NullColumn::create()); column = ArrayColumn::create(large_elements, offsets); for (size_t i = 0; i < 10; i++) { - large_elements->append(std::to_string(i)); + large_elements->append_datum(Slice(std::to_string(i))); offsets->append(i + 1); } ASSERT_TRUE(column->has_large_column()); @@ -112,18 +90,18 @@ PARALLEL_TEST(ArrayColumnTest, test_array_column_downgrade) { // NOLINTNEXTLINE PARALLEL_TEST(ArrayColumnTest, test_get_elements) { auto offsets = UInt32Column::create(); - auto elements = Int32Column::create(); + auto elements = NullableColumn::create(Int32Column::create(), NullColumn::create()); auto column = ArrayColumn::create(elements, offsets); // insert [1, 2, 3], [4, 5, 6] - elements->append(1); - elements->append(2); - elements->append(3); - offsets->append(3); - - elements->append(4); - elements->append(5); - elements->append(6); + elements->append_datum(1); + elements->append_datum(2); + elements->append_datum(3); + offsets->append_datum(3); + + elements->append_datum(4); + elements->append_datum(5); + elements->append_datum(6); offsets->append(6); ASSERT_EQ("[1,2,3]", column->debug_item(0)); @@ -133,33 +111,33 @@ PARALLEL_TEST(ArrayColumnTest, test_get_elements) { // NOLINTNEXTLINE PARALLEL_TEST(ArrayColumnTest, test_byte_size) { auto offsets = UInt32Column::create(); - auto elements = Int32Column::create(); + auto elements = NullableColumn::create(Int32Column::create(), NullColumn::create()); auto column = ArrayColumn::create(elements, offsets); // insert [1, 2, 3], [4, 5, 6] - elements->append(1); - elements->append(2); - elements->append(3); - offsets->append(3); - - elements->append(4); - elements->append(5); - elements->append(6); + elements->append_datum(1); + elements->append_datum(2); + elements->append_datum(3); + offsets->append_datum(3); + + elements->append_datum(4); + elements->append_datum(5); + elements->append_datum(6); offsets->append(6); ASSERT_EQ(2, column->size()); - // elements has six element, with 24 bytes. + // elements has six element, with 24 + 6(null) bytes. // offsets has three element, with 12 bytes. - ASSERT_EQ(36, column->byte_size()); - // elements 0 with 12 bytes. + ASSERT_EQ(42, column->byte_size()); + // elements 0 with 12 + 3(null) bytes. // offset 0 with 4 bytes. - ASSERT_EQ(16, column->byte_size(0, 1)); - ASSERT_EQ(16, column->byte_size(0)); + ASSERT_EQ(19, column->byte_size(0, 1)); + ASSERT_EQ(19, column->byte_size(0)); - // elements 1 with 12 bytes. + // elements 1 with 12 + 3(null) bytes. // offset 1 with 4 bytes. - ASSERT_EQ(16, column->byte_size(1, 1)); + ASSERT_EQ(19, column->byte_size(1, 1)); } // NOLINTNEXTLINE @@ -326,7 +304,8 @@ PARALLEL_TEST(ArrayColumnTest, test_filter) { // ARRAY> { const int N = 100; - auto elements = ArrayColumn::create(Int32Column::create(), UInt32Column::create()); + auto elements = ArrayColumn::create(NullableColumn::create(Int32Column::create(), NullColumn::create()), + UInt32Column::create()); auto nullable_elements = NullableColumn::create(std::move(elements), NullColumn::create()); auto offsets = UInt32Column ::create(); auto column = ArrayColumn::create(std::move(nullable_elements), std::move(offsets)); @@ -413,24 +392,24 @@ PARALLEL_TEST(ArrayColumnTest, test_filter) { // NOLINTNEXTLINE PARALLEL_TEST(ArrayColumnTest, test_append_array) { auto offsets = UInt32Column::create(); - auto elements = Int32Column::create(); + auto elements = NullableColumn::create(Int32Column::create(), NullColumn::create()); auto column = ArrayColumn::create(elements, offsets); // insert [1, 2, 3], [4, 5, 6] - elements->append(1); - elements->append(2); - elements->append(3); - offsets->append(3); + elements->append_datum(1); + elements->append_datum(2); + elements->append_datum(3); + offsets->append_datum(3); - elements->append(4); - elements->append(5); - elements->append(6); - offsets->append(6); + elements->append_datum(4); + elements->append_datum(5); + elements->append_datum(6); + offsets->append_datum(6); // append [7, 8, 9] - elements->append(7); - elements->append(8); - elements->append(9); + elements->append_datum(7); + elements->append_datum(8); + elements->append_datum(9); offsets->append(9); ASSERT_EQ("[7,8,9]", column->debug_item(2)); @@ -439,7 +418,7 @@ PARALLEL_TEST(ArrayColumnTest, test_append_array) { // NOLINTNEXTLINE PARALLEL_TEST(ArrayColumnTest, test_append_nulls) { auto offsets = UInt32Column::create(); - auto elements = Int32Column::create(); + auto elements = NullableColumn::create(Int32Column::create(), NullColumn::create()); auto column = ArrayColumn::create(elements, offsets); auto null_column = NullColumn::create(); auto nullable_column = NullableColumn::create(column, null_column); @@ -448,15 +427,15 @@ PARALLEL_TEST(ArrayColumnTest, test_append_nulls) { // insert [1, 2, 3], [4, 5, 6] null_column->append(0); - elements->append(1); - elements->append(2); - elements->append(3); + elements->append_datum(1); + elements->append_datum(2); + elements->append_datum(3); offsets->append(3); null_column->append(0); - elements->append(4); - elements->append(5); - elements->append(6); + elements->append_datum(4); + elements->append_datum(5); + elements->append_datum(6); offsets->append(6); ASSERT_EQ(3, nullable_column->size()); @@ -467,18 +446,18 @@ PARALLEL_TEST(ArrayColumnTest, test_append_nulls) { // NOLINTNEXTLINE PARALLEL_TEST(ArrayColumnTest, test_append_defaults) { auto offsets = UInt32Column::create(); - auto elements = Int32Column::create(); + auto elements = NullableColumn::create(Int32Column::create(), NullColumn::create()); auto column = ArrayColumn::create(elements, offsets); // insert [1, 2, 3], [4, 5, 6] - elements->append(1); - elements->append(2); - elements->append(3); - offsets->append(3); - - elements->append(4); - elements->append(5); - elements->append(6); + elements->append_datum(1); + elements->append_datum(2); + elements->append_datum(3); + offsets->append_datum(3); + + elements->append_datum(4); + elements->append_datum(5); + elements->append_datum(6); offsets->append(6); // append_default @@ -492,33 +471,33 @@ PARALLEL_TEST(ArrayColumnTest, test_append_defaults) { // NOLINTNEXTLINE PARALLEL_TEST(ArrayColumnTest, test_compare_at) { auto offsets = UInt32Column::create(); - auto elements = Int32Column::create(); + auto elements = NullableColumn::create(Int32Column::create(), NullColumn::create()); auto column = ArrayColumn::create(elements, offsets); // insert [1, 2, 3], [4, 5, 6] - elements->append(1); - elements->append(2); - elements->append(3); + elements->append_datum(1); + elements->append_datum(2); + elements->append_datum(3); offsets->append(3); - elements->append(4); - elements->append(5); - elements->append(6); + elements->append_datum(4); + elements->append_datum(5); + elements->append_datum(6); offsets->append(6); auto offsets_2 = UInt32Column::create(); - auto elements_2 = Int32Column::create(); + auto elements_2 = NullableColumn::create(Int32Column::create(), NullColumn::create()); auto column_2 = ArrayColumn::create(elements_2, offsets_2); // insert [4, 5, 6], [7, 8, 9] - elements_2->append(4); - elements_2->append(5); - elements_2->append(6); + elements_2->append_datum(4); + elements_2->append_datum(5); + elements_2->append_datum(6); offsets_2->append(3); - elements_2->append(7); - elements_2->append(8); - elements_2->append(9); + elements_2->append_datum(7); + elements_2->append_datum(8); + elements_2->append_datum(9); offsets_2->append(6); ASSERT_EQ(2, column->size()); @@ -537,11 +516,8 @@ PARALLEL_TEST(ArrayColumnTest, equals) { // lhs: [1,2,3], [4,5], [1,2] // rhs: [3,2,1], [4,5], [1,null] ArrayColumn::Ptr lhs; - { - auto offsets = UInt32Column::create(); - auto elements = Int32Column::create(); - lhs = ArrayColumn::create(elements, offsets); - } + lhs = ArrayColumn::create(NullableColumn::create(Int32Column::create(), NullColumn::create()), + UInt32Column::create()); { lhs->elements_column()->append_datum(Datum(1)); @@ -561,12 +537,8 @@ PARALLEL_TEST(ArrayColumnTest, equals) { } ArrayColumn::Ptr rhs; - { - auto offsets = UInt32Column::create(); - auto elements = Int32Column::create(); - auto nulls = NullColumn ::create(); - rhs = ArrayColumn::create(NullableColumn::create(elements, nulls), offsets); - } + rhs = ArrayColumn::create(NullableColumn::create(Int32Column::create(), NullColumn::create()), + UInt32Column::create()); { rhs->elements_column()->append_datum(Datum(3)); @@ -593,32 +565,37 @@ PARALLEL_TEST(ArrayColumnTest, equals) { // NOLINTNEXTLINE PARALLEL_TEST(ArrayColumnTest, test_multi_dimension_array) { auto offsets = UInt32Column::create(); - auto elements = Int32Column::create(); + auto elements = NullableColumn::create(Int32Column::create(), NullColumn::create()); auto offsets_1 = UInt32Column::create(); - auto elements_1 = ArrayColumn::create(elements, offsets); + auto elements_1 = NullableColumn::create(ArrayColumn::create(elements, offsets), NullColumn::create()); auto column = ArrayColumn::create(elements_1, offsets_1); // insert [[1, 2, 3], [4, 5, 6]], [[7], [8], [9]] - elements->append(1); - elements->append(2); - elements->append(3); + elements->append_datum(1); + elements->append_datum(2); + elements->append_datum(3); + elements_1->null_column()->append(0); offsets->append(3); - elements->append(4); - elements->append(5); - elements->append(6); + elements->append_datum(4); + elements->append_datum(5); + elements->append_datum(6); + elements_1->null_column()->append(0); offsets->append(6); offsets_1->append(2); - elements->append(7); + elements->append_datum(7); + elements_1->null_column()->append(0); offsets->append(7); - elements->append(8); + elements->append_datum(8); + elements_1->null_column()->append(0); offsets->append(8); - elements->append(9); + elements->append_datum(9); + elements_1->null_column()->append(0); offsets->append(9); offsets_1->append(5); @@ -629,23 +606,23 @@ PARALLEL_TEST(ArrayColumnTest, test_multi_dimension_array) { // NOLINTNEXTLINE PARALLEL_TEST(ArrayColumnTest, test_resize) { auto offsets = UInt32Column::create(); - auto elements = Int32Column::create(); + auto elements = NullableColumn::create(Int32Column::create(), NullColumn::create()); auto column = ArrayColumn::create(elements, offsets); // insert [1, 2, 3], [4, 5, 6], [7, 8, 9] - elements->append(1); - elements->append(2); - elements->append(3); + elements->append_datum(1); + elements->append_datum(2); + elements->append_datum(3); offsets->append(3); - elements->append(4); - elements->append(5); - elements->append(6); + elements->append_datum(4); + elements->append_datum(5); + elements->append_datum(6); offsets->append(6); - elements->append(7); - elements->append(8); - elements->append(9); + elements->append_datum(7); + elements->append_datum(8); + elements->append_datum(9); offsets->append(9); column->resize(1); @@ -656,23 +633,23 @@ PARALLEL_TEST(ArrayColumnTest, test_resize) { // NOLINTNEXTLINE PARALLEL_TEST(ArrayColumnTest, test_reset_column) { auto offsets = UInt32Column::create(); - auto elements = Int32Column::create(); + auto elements = NullableColumn::create(Int32Column::create(), NullColumn::create()); auto column = ArrayColumn::create(elements, offsets); // insert [1, 2, 3], [4, 5, 6], [7, 8, 9] - elements->append(1); - elements->append(2); - elements->append(3); + elements->append_datum(1); + elements->append_datum(2); + elements->append_datum(3); offsets->append(3); - elements->append(4); - elements->append(5); - elements->append(6); + elements->append_datum(4); + elements->append_datum(5); + elements->append_datum(6); offsets->append(6); - elements->append(7); - elements->append(8); - elements->append(9); + elements->append_datum(7); + elements->append_datum(8); + elements->append_datum(9); offsets->append(9); column->reset_column(); @@ -682,33 +659,33 @@ PARALLEL_TEST(ArrayColumnTest, test_reset_column) { // NOLINTNEXTLINE PARALLEL_TEST(ArrayColumnTest, test_swap_column) { auto offsets = UInt32Column::create(); - auto elements = Int32Column::create(); + auto elements = NullableColumn::create(Int32Column::create(), NullColumn::create()); auto column = ArrayColumn::create(elements, offsets); // insert [1, 2, 3], [4, 5, 6] - elements->append(1); - elements->append(2); - elements->append(3); + elements->append_datum(1); + elements->append_datum(2); + elements->append_datum(3); offsets->append(3); - elements->append(4); - elements->append(5); - elements->append(6); + elements->append_datum(4); + elements->append_datum(5); + elements->append_datum(6); offsets->append(6); auto offsets_2 = UInt32Column::create(); - auto elements_2 = Int32Column::create(); + auto elements_2 = NullableColumn::create(Int32Column::create(), NullColumn::create()); auto column_2 = ArrayColumn::create(elements_2, offsets_2); // insert [4, 5, 6], [7, 8, 9] - elements_2->append(4); - elements_2->append(5); - elements_2->append(6); + elements_2->append_datum(4); + elements_2->append_datum(5); + elements_2->append_datum(6); offsets_2->append(3); - elements_2->append(7); - elements_2->append(8); - elements_2->append(9); + elements_2->append_datum(7); + elements_2->append_datum(8); + elements_2->append_datum(9); offsets_2->append(6); column->swap_column(*column_2); @@ -718,20 +695,19 @@ PARALLEL_TEST(ArrayColumnTest, test_swap_column) { // NOLINTNEXTLINE PARALLEL_TEST(ArrayColumnTest, test_copy_constructor) { - auto c0 = ArrayColumn::create(Int32Column::create(), UInt32Column::create()); - - auto* offsets = down_cast(c0->offsets_column().get()); - auto* elements = down_cast(c0->elements_column().get()); + auto offsets = UInt32Column::create(); + auto elements = NullableColumn::create(Int32Column::create(), NullColumn::create()); + auto c0 = ArrayColumn::create(elements, offsets); // insert [1, 2, 3], [4, 5, 6] - elements->append(1); - elements->append(2); - elements->append(3); + elements->append_datum(1); + elements->append_datum(2); + elements->append_datum(3); offsets->append(3); - elements->append(4); - elements->append(5); - elements->append(6); + elements->append_datum(4); + elements->append_datum(5); + elements->append_datum(6); offsets->append(6); ArrayColumn c1(*c0); @@ -744,48 +720,44 @@ PARALLEL_TEST(ArrayColumnTest, test_copy_constructor) { // NOLINTNEXTLINE PARALLEL_TEST(ArrayColumnTest, test_move_constructor) { - auto c0 = ArrayColumn::create(Int32Column::create(), UInt32Column::create()); - - auto* offsets = down_cast(c0->offsets_column().get()); - auto* elements = down_cast(c0->elements_column().get()); + auto offsets = UInt32Column::create(); + auto elements = NullableColumn::create(Int32Column::create(), NullColumn::create()); + auto c0 = ArrayColumn::create(elements, offsets); // insert [1, 2, 3], [4, 5, 6] - elements->append(1); - elements->append(2); - elements->append(3); + elements->append_datum(1); + elements->append_datum(2); + elements->append_datum(3); offsets->append(3); - elements->append(4); - elements->append(5); - elements->append(6); + elements->append_datum(4); + elements->append_datum(5); + elements->append_datum(6); offsets->append(6); ArrayColumn c1(std::move(*c0)); ASSERT_EQ("[1,2,3]", c1.debug_item(0)); ASSERT_EQ("[4,5,6]", c1.debug_item(1)); - ASSERT_TRUE(c1.elements_column().unique()); - ASSERT_TRUE(c1.offsets_column().unique()); } // NOLINTNEXTLINE PARALLEL_TEST(ArrayColumnTest, test_copy_assignment) { - auto c0 = ArrayColumn::create(Int32Column::create(), UInt32Column::create()); - - auto* offsets = down_cast(c0->offsets_column().get()); - auto* elements = down_cast(c0->elements_column().get()); + auto offsets = UInt32Column::create(); + auto elements = NullableColumn::create(Int32Column::create(), NullColumn::create()); + auto c0 = ArrayColumn::create(elements, offsets); // insert [1, 2, 3], [4, 5, 6] - elements->append(1); - elements->append(2); - elements->append(3); + elements->append_datum(1); + elements->append_datum(2); + elements->append_datum(3); offsets->append(3); - elements->append(4); - elements->append(5); - elements->append(6); + elements->append_datum(4); + elements->append_datum(5); + elements->append_datum(6); offsets->append(6); - ArrayColumn c1(Int32Column::create(), UInt32Column::create()); + ArrayColumn c1(NullableColumn::create(Int32Column::create(), NullColumn::create()), UInt32Column::create()); c1 = *c0; c0->reset_column(); ASSERT_EQ("[1,2,3]", c1.debug_item(0)); @@ -796,23 +768,22 @@ PARALLEL_TEST(ArrayColumnTest, test_copy_assignment) { // NOLINTNEXTLINE PARALLEL_TEST(ArrayColumnTest, test_move_assignment) { - auto c0 = ArrayColumn::create(Int32Column::create(), UInt32Column::create()); - - auto* offsets = down_cast(c0->offsets_column().get()); - auto* elements = down_cast(c0->elements_column().get()); + auto offsets = UInt32Column::create(); + auto elements = NullableColumn::create(Int32Column::create(), NullColumn::create()); + auto c0 = ArrayColumn::create(elements, offsets); // insert [1, 2, 3], [4, 5, 6] - elements->append(1); - elements->append(2); - elements->append(3); + elements->append_datum(1); + elements->append_datum(2); + elements->append_datum(3); offsets->append(3); - elements->append(4); - elements->append(5); - elements->append(6); + elements->append_datum(4); + elements->append_datum(5); + elements->append_datum(6); offsets->append(6); - ArrayColumn c1(Int32Column ::create(), UInt32Column::create()); + ArrayColumn c1(NullableColumn::create(Int32Column ::create(), NullColumn::create()), UInt32Column::create()); c1 = std::move(*c0); ASSERT_EQ("[1,2,3]", c1.debug_item(0)); ASSERT_EQ("[4,5,6]", c1.debug_item(1)); @@ -822,20 +793,19 @@ PARALLEL_TEST(ArrayColumnTest, test_move_assignment) { // NOLINTNEXTLINE PARALLEL_TEST(ArrayColumnTest, test_clone) { - auto c0 = ArrayColumn::create(Int32Column::create(), UInt32Column::create()); - - auto* offsets = down_cast(c0->offsets_column().get()); - auto* elements = down_cast(c0->elements_column().get()); + auto offsets = UInt32Column::create(); + auto elements = NullableColumn::create(Int32Column::create(), NullColumn::create()); + auto c0 = ArrayColumn::create(elements, offsets); // insert [1, 2, 3], [4, 5, 6] - elements->append(1); - elements->append(2); - elements->append(3); + elements->append_datum(1); + elements->append_datum(2); + elements->append_datum(3); offsets->append(3); - elements->append(4); - elements->append(5); - elements->append(6); + elements->append_datum(4); + elements->append_datum(5); + elements->append_datum(6); offsets->append(6); auto c1 = c0->clone(); @@ -848,20 +818,19 @@ PARALLEL_TEST(ArrayColumnTest, test_clone) { // NOLINTNEXTLINE PARALLEL_TEST(ArrayColumnTest, test_clone_shared) { - auto c0 = ArrayColumn::create(Int32Column::create(), UInt32Column::create()); - - auto* offsets = down_cast(c0->offsets_column().get()); - auto* elements = down_cast(c0->elements_column().get()); + auto offsets = UInt32Column::create(); + auto elements = NullableColumn::create(Int32Column::create(), NullColumn::create()); + auto c0 = ArrayColumn::create(elements, offsets); // insert [1, 2, 3], [4, 5, 6] - elements->append(1); - elements->append(2); - elements->append(3); + elements->append_datum(1); + elements->append_datum(2); + elements->append_datum(3); offsets->append(3); - elements->append(4); - elements->append(5); - elements->append(6); + elements->append_datum(4); + elements->append_datum(5); + elements->append_datum(6); offsets->append(6); auto c1 = c0->clone_shared(); @@ -875,20 +844,19 @@ PARALLEL_TEST(ArrayColumnTest, test_clone_shared) { // NOLINTNEXTLINE PARALLEL_TEST(ArrayColumnTest, test_clone_column) { - auto c0 = ArrayColumn::create(Int32Column::create(), UInt32Column::create()); - - auto* offsets = down_cast(c0->offsets_column().get()); - auto* elements = down_cast(c0->elements_column().get()); + auto offsets = UInt32Column::create(); + auto elements = NullableColumn::create(Int32Column::create(), NullColumn::create()); + auto c0 = ArrayColumn::create(elements, offsets); // insert [1, 2, 3], [4, 5, 6] - elements->append(1); - elements->append(2); - elements->append(3); + elements->append_datum(1); + elements->append_datum(2); + elements->append_datum(3); offsets->append(3); - elements->append(4); - elements->append(5); - elements->append(6); + elements->append_datum(4); + elements->append_datum(5); + elements->append_datum(6); offsets->append(6); auto cloned_column = c0->clone_empty(); @@ -899,22 +867,21 @@ PARALLEL_TEST(ArrayColumnTest, test_clone_column) { } PARALLEL_TEST(ArrayColumnTest, test_array_hash) { - auto c0 = ArrayColumn::create(Int32Column::create(), UInt32Column::create()); - - auto* offsets = down_cast(c0->offsets_column().get()); - auto* elements = down_cast(c0->elements_column().get()); + auto offsets = UInt32Column::create(); + auto elements = NullableColumn::create(Int32Column::create(), NullColumn::create()); + auto c0 = ArrayColumn::create(elements, offsets); // insert [1, 2, 3], [4, 5, 6] size_t array_size_1 = 3; - elements->append(1); - elements->append(2); - elements->append(3); + elements->append_datum(1); + elements->append_datum(2); + elements->append_datum(3); offsets->append(3); size_t array_size_2 = 3; - elements->append(4); - elements->append(5); - elements->append(6); + elements->append_datum(4); + elements->append_datum(5); + elements->append_datum(6); offsets->append(6); uint32_t hash_value[2] = {0, 0}; @@ -947,7 +914,7 @@ PARALLEL_TEST(ArrayColumnTest, test_array_hash) { // overflow test for (int i = 0; i < 100000; ++i) { - elements->append(i); + elements->append_datum(i); } offsets->append(elements->size()); uint32_t hash_value_overflow_test[3] = {0, 0, 0}; @@ -966,22 +933,21 @@ PARALLEL_TEST(ArrayColumnTest, test_array_hash) { } PARALLEL_TEST(ArrayColumnTest, test_xor_checksum) { - auto c0 = ArrayColumn::create(Int32Column::create(), UInt32Column::create()); - - auto* offsets = down_cast(c0->offsets_column().get()); - auto* elements = down_cast(c0->elements_column().get()); + auto offsets = UInt32Column::create(); + auto elements = NullableColumn::create(Int32Column::create(), NullColumn::create()); + auto c0 = ArrayColumn::create(elements, offsets); // insert [1, 2, 3], [4, 5, 6, 7] - elements->append(1); - elements->append(2); - elements->append(3); + elements->append_datum(1); + elements->append_datum(2); + elements->append_datum(3); offsets->append(3); - elements->append(4); - elements->append(5); - elements->append(6); - elements->append(7); - elements->append(8); + elements->append_datum(4); + elements->append_datum(5); + elements->append_datum(6); + elements->append_datum(7); + elements->append_datum(8); offsets->append(8); int64_t checksum = c0->xor_checksum(0, 2); @@ -992,43 +958,43 @@ PARALLEL_TEST(ArrayColumnTest, test_xor_checksum) { PARALLEL_TEST(ArrayColumnTest, test_update_rows) { auto offsets = UInt32Column::create(); - auto elements = Int32Column::create(); + auto elements = NullableColumn::create(Int32Column::create(), NullColumn::create()); auto column = ArrayColumn::create(elements, offsets); // insert [1, 2, 3], [4, 5, 6] - elements->append(1); - elements->append(2); - elements->append(3); + elements->append_datum(1); + elements->append_datum(2); + elements->append_datum(3); offsets->append(3); - elements->append(4); - elements->append(5); - elements->append(6); + elements->append_datum(4); + elements->append_datum(5); + elements->append_datum(6); offsets->append(6); // append [7, 8, 9] - elements->append(7); - elements->append(8); - elements->append(9); + elements->append_datum(7); + elements->append_datum(8); + elements->append_datum(9); offsets->append(9); // append [10, 11, 12] - elements->append(10); - elements->append(11); - elements->append(12); + elements->append_datum(10); + elements->append_datum(11); + elements->append_datum(12); offsets->append(12); auto offset_col1 = UInt32Column::create(); - auto element_col1 = Int32Column::create(); + auto element_col1 = NullableColumn::create(Int32Column::create(), NullColumn::create()); auto replace_col1 = ArrayColumn::create(element_col1, offset_col1); // insert [101, 102], [103, 104] - element_col1->append(101); - element_col1->append(102); + element_col1->append_datum(101); + element_col1->append_datum(102); offset_col1->append(2); - element_col1->append(103); - element_col1->append(104); + element_col1->append_datum(103); + element_col1->append_datum(104); offset_col1->append(4); std::vector replace_idxes = {1, 3}; @@ -1041,16 +1007,16 @@ PARALLEL_TEST(ArrayColumnTest, test_update_rows) { ASSERT_EQ("[103,104]", column->debug_item(3)); auto offset_col2 = UInt32Column::create(); - auto element_col2 = Int32Column::create(); + auto element_col2 = NullableColumn::create(Int32Column::create(), NullColumn::create()); auto replace_col2 = ArrayColumn::create(element_col2, offset_col2); // insert [201, 202], [203, 204] - element_col2->append(201); - element_col2->append(202); + element_col2->append_datum(201); + element_col2->append_datum(202); offset_col2->append(2); - element_col2->append(203); - element_col2->append(204); + element_col2->append_datum(203); + element_col2->append_datum(204); offset_col2->append(4); ASSERT_TRUE(column->update_rows(*replace_col2.get(), replace_idxes.data()).ok()); @@ -1065,18 +1031,18 @@ PARALLEL_TEST(ArrayColumnTest, test_update_rows) { PARALLEL_TEST(ArrayColumnTest, test_assign) { /// test assign comment arrays auto offsets = UInt32Column::create(); - auto elements = Int32Column::create(); + auto elements = NullableColumn::create(Int32Column::create(), NullColumn::create()); auto column = ArrayColumn::create(elements, offsets); // insert [1, 2, 3], [4, 5, 6] - elements->append(1); - elements->append(2); - elements->append(3); + elements->append_datum(1); + elements->append_datum(2); + elements->append_datum(3); offsets->append(3); - elements->append(4); - elements->append(5); - elements->append(6); + elements->append_datum(4); + elements->append_datum(5); + elements->append_datum(6); offsets->append(6); // assign @@ -1088,10 +1054,9 @@ PARALLEL_TEST(ArrayColumnTest, test_assign) { ASSERT_EQ("[1,2,3]", column->debug_item(3)); /// test assign [null] - elements = Int32Column::create(); - auto nullable_elements = NullableColumn::create(std::move(elements), NullColumn::create()); - offsets = UInt32Column ::create(); - column = ArrayColumn::create(std::move(nullable_elements), std::move(offsets)); + offsets = UInt32Column::create(); + elements = NullableColumn::create(Int32Column::create(), NullColumn::create()); + column = ArrayColumn::create(std::move(elements), std::move(offsets)); column->append_datum(DatumArray{Datum()}); column->assign(5, 0); @@ -1120,18 +1085,18 @@ PARALLEL_TEST(ArrayColumnTest, test_assign) { PARALLEL_TEST(ArrayColumnTest, test_empty_null_array) { auto offsets = UInt32Column::create(); - auto elements = Int32Column::create(); + auto elements = NullableColumn::create(Int32Column::create(), NullColumn::create()); auto column = ArrayColumn::create(elements, offsets); // insert [1, 2, 3], [4, 5, 6] - elements->append(1); - elements->append(2); - elements->append(3); + elements->append_datum(1); + elements->append_datum(2); + elements->append_datum(3); offsets->append(3); - elements->append(4); - elements->append(5); - elements->append(6); + elements->append_datum(4); + elements->append_datum(5); + elements->append_datum(6); offsets->append(6); auto null_map = NullColumn::create(2, 0); @@ -1158,18 +1123,18 @@ PARALLEL_TEST(ArrayColumnTest, test_empty_null_array) { PARALLEL_TEST(ArrayColumnTest, test_replicate) { auto offsets = UInt32Column::create(); - auto elements = Int32Column::create(); + auto elements = NullableColumn::create(Int32Column::create(), NullColumn::create()); auto column = ArrayColumn::create(elements, offsets); // insert [1, 2, 3], [4, 5, 6],[] - elements->append(1); - elements->append(2); - elements->append(3); + elements->append_datum(1); + elements->append_datum(2); + elements->append_datum(3); offsets->append(3); - elements->append(4); - elements->append(5); - elements->append(6); + elements->append_datum(4); + elements->append_datum(5); + elements->append_datum(6); offsets->append(6); offsets->append(6); @@ -1192,22 +1157,22 @@ PARALLEL_TEST(ArrayColumnTest, test_replicate) { PARALLEL_TEST(ArrayColumnTest, test_element_memory_usage) { auto offsets = UInt32Column::create(); - auto elements = Int32Column::create(); + auto elements = NullableColumn::create(Int32Column::create(), NullColumn::create()); auto column = ArrayColumn::create(elements, offsets); // insert [],[1],[2, 3],[4, 5, 6] offsets->append(0); - elements->append(1); + elements->append_datum(1); offsets->append(1); - elements->append(2); - elements->append(3); + elements->append_datum(2); + elements->append_datum(3); offsets->append(3); - elements->append(4); - elements->append(5); - elements->append(6); + elements->append_datum(4); + elements->append_datum(5); + elements->append_datum(6); offsets->append(6); ASSERT_EQ("[]", column->debug_item(0)); @@ -1215,18 +1180,18 @@ PARALLEL_TEST(ArrayColumnTest, test_element_memory_usage) { ASSERT_EQ("[2,3]", column->debug_item(2)); ASSERT_EQ("[4,5,6]", column->debug_item(3)); - ASSERT_EQ(40, column->Column::element_memory_usage()); - - std::vector element_mem_usages = {4, 8, 12, 16}; - size_t element_num = element_mem_usages.size(); - for (size_t start = 0; start < element_num; start++) { - size_t expected_usage = 0; - ASSERT_EQ(expected_usage, column->element_memory_usage(start, 0)); - for (size_t size = 1; start + size <= element_num; size++) { - expected_usage += element_mem_usages[start + size - 1]; - ASSERT_EQ(expected_usage, column->element_memory_usage(start, size)); - } - } + // 1 element occupy 4 + 1 = 5 bytes, 1 offset occupy 4 bytes + ASSERT_EQ(46, column->Column::element_memory_usage()); + ASSERT_EQ(4, column->element_memory_usage(0, 1)); // [] 1 offset, 0 element + ASSERT_EQ(13, column->element_memory_usage(0, 2)); // [][1] 2 offset, 1 element + ASSERT_EQ(27, column->element_memory_usage(0, 3)); // [][1][2, 3] 3 offset, 3 element + ASSERT_EQ(46, column->element_memory_usage(0, 4)); // [][1][2, 3][4, 5, 6] 4 offset, 6 element + ASSERT_EQ(9, column->element_memory_usage(1, 1)); // [1] 1 offset, 1 element + ASSERT_EQ(23, column->element_memory_usage(1, 2)); // [1][2, 3] 2 offset, 3 element + ASSERT_EQ(42, column->element_memory_usage(1, 3)); // [1][2, 3][4, 5, 6] 3 offset, 6 element + ASSERT_EQ(14, column->element_memory_usage(2, 1)); // [2, 3] 1 offset, 2 element + ASSERT_EQ(33, column->element_memory_usage(2, 2)); // [2, 3][4, 5, 6] 2 offset, 5 element + ASSERT_EQ(19, column->element_memory_usage(3, 1)); // [4, 5, 6] 1 offset, 3 element } } // namespace starrocks diff --git a/be/test/column/map_column_test.cpp b/be/test/column/map_column_test.cpp index 3e3b30a74f875..04fd4ac1ddbff 100644 --- a/be/test/column/map_column_test.cpp +++ b/be/test/column/map_column_test.cpp @@ -988,98 +988,6 @@ PARALLEL_TEST(MapColumnTest, test_clone_column) { ASSERT_EQ(1, down_cast(cloned_column.get())->offsets_column()->size()); } -//PARALLEL_TEST(ArrayColumnTest, test_array_hash) { -// auto c0 = ArrayColumn::create(Int32Column::create(), UInt32Column::create()); -// -// auto* offsets = down_cast(c0->offsets_column().get()); -// auto* elements = down_cast(c0->elements_column().get()); -// -// // insert [1, 2, 3], [4, 5, 6] -// size_t array_size_1 = 3; -// elements->append(1); -// elements->append(2); -// elements->append(3); -// offsets->append(3); -// -// size_t array_size_2 = 3; -// elements->append(4); -// elements->append(5); -// elements->append(6); -// offsets->append(6); -// -// uint32_t hash_value[2] = {0, 0}; -// c0->crc32_hash(hash_value, 0, 2); -// -// uint32_t hash_value_1 = HashUtil::zlib_crc_hash(&array_size_1, sizeof(array_size_1), 0); -// for (int i = 0; i < 3; ++i) { -// elements->crc32_hash(&hash_value_1 - i, i, i + 1); -// } -// uint32_t hash_value_2 = HashUtil::zlib_crc_hash(&array_size_2, sizeof(array_size_2), 0); -// for (int i = 3; i < 6; ++i) { -// elements->crc32_hash(&hash_value_2 - i, i, i + 1); -// } -// ASSERT_EQ(hash_value_1, hash_value[0]); -// ASSERT_EQ(hash_value_2, hash_value[1]); -// -// uint32_t hash_value_fnv[2] = {0, 0}; -// c0->fnv_hash(hash_value_fnv, 0, 2); -// uint32_t hash_value_1_fnv = HashUtil::fnv_hash(&array_size_1, sizeof(array_size_1), 0); -// for (int i = 0; i < 3; ++i) { -// elements->fnv_hash(&hash_value_1_fnv - i, i, i + 1); -// } -// uint32_t hash_value_2_fnv = HashUtil::fnv_hash(&array_size_2, sizeof(array_size_2), 0); -// for (int i = 3; i < 6; ++i) { -// elements->fnv_hash(&hash_value_2_fnv - i, i, i + 1); -// } -// -// ASSERT_EQ(hash_value_1_fnv, hash_value_fnv[0]); -// ASSERT_EQ(hash_value_2_fnv, hash_value_fnv[1]); -// -// // overflow test -// for (int i = 0; i < 100000; ++i) { -// elements->append(i); -// } -// offsets->append(elements->size()); -// uint32_t hash_value_overflow_test[3] = {0, 0, 0}; -// c0->crc32_hash(hash_value_overflow_test, 0, 3); -// -// auto& offset_values = offsets->get_data(); -// size_t sz = offset_values[offset_values.size() - 1] - offset_values[offset_values.size() - 2]; -// -// uint32_t hash_value_overflow = HashUtil::zlib_crc_hash(&sz, sizeof(sz), 0); -// for (int i = 0; i < 100000; ++i) { -// uint32_t value = i; -// hash_value_overflow = HashUtil::zlib_crc_hash(&value, sizeof(value), hash_value_overflow); -// } -// -// ASSERT_EQ(hash_value_overflow, hash_value_overflow_test[2]); -//} -// -//PARALLEL_TEST(ArrayColumnTest, test_xor_checksum) { -// auto c0 = ArrayColumn::create(Int32Column::create(), UInt32Column::create()); -// -// auto* offsets = down_cast(c0->offsets_column().get()); -// auto* elements = down_cast(c0->elements_column().get()); -// -// // insert [1, 2, 3], [4, 5, 6, 7] -// elements->append(1); -// elements->append(2); -// elements->append(3); -// offsets->append(3); -// -// elements->append(4); -// elements->append(5); -// elements->append(6); -// elements->append(7); -// elements->append(8); -// offsets->append(8); -// -// int64_t checksum = c0->xor_checksum(0, 2); -// int64_t expected_checksum = 14; -// -// ASSERT_EQ(checksum, expected_checksum); -//} - PARALLEL_TEST(MapColumnTest, test_update_rows) { auto c0 = MapColumn::create(NullableColumn::create(Int32Column::create(), NullColumn::create()), NullableColumn::create(Int32Column::create(), NullColumn::create()), @@ -1251,7 +1159,9 @@ PARALLEL_TEST(MapColumnTest, test_euqals) { auto keys_data = Int32Column::create(); auto keys_null = NullColumn::create(); auto keys = NullableColumn::create(keys_data, keys_null); - auto values = Int32Column::create(); + auto values_data = Int32Column::create(); + auto values_null = NullColumn::create(); + auto values = NullableColumn::create(values_data, values_null); rhs = MapColumn::create(keys, values, offsets); } { diff --git a/be/test/exprs/agg/aggregate_test.cpp b/be/test/exprs/agg/aggregate_test.cpp index 4b6972916783b..c19bc3b84fd5c 100644 --- a/be/test/exprs/agg/aggregate_test.cpp +++ b/be/test/exprs/agg/aggregate_test.cpp @@ -962,7 +962,7 @@ TEST_F(AggregateTest, test_window_funnel) { builder.append(true); builder.append(true); builder.append(true); - auto data_col = builder.build(false); + auto data_col = NullableColumn::create(builder.build(false), NullColumn::create(6, 0)); auto offsets = UInt32Column::create(); offsets->append(2); // [true, true] @@ -1010,7 +1010,7 @@ TEST_F(AggregateTest, test_dict_merge) { builder.append(Slice("starrocks-1")); builder.append(Slice("starrocks-starrocks")); builder.append(Slice("starrocks-starrocks")); - auto data_col = builder.build(false); + auto data_col = NullableColumn::create(builder.build(false), NullColumn::create(5, 0)); auto offsets = UInt32Column::create(); offsets->append(0); @@ -1040,7 +1040,7 @@ TEST_F(AggregateTest, test_dict_merge) { std::set origin_data; std::set ids; - auto binary_column = down_cast(data_col.get()); + auto binary_column = down_cast(data_col->data_column().get()); for (int i = 0; i < binary_column->size(); ++i) { auto slice = binary_column->get_slice(i); origin_data.emplace(slice.data, slice.size); diff --git a/be/test/exprs/array_element_expr_test.cpp b/be/test/exprs/array_element_expr_test.cpp index 4fb1933b8b613..aec31a11accc3 100644 --- a/be/test/exprs/array_element_expr_test.cpp +++ b/be/test/exprs/array_element_expr_test.cpp @@ -250,7 +250,8 @@ TEST_F(ArrayElementExprTest, test_one_dim_array) { { std::unique_ptr expr = create_array_element_expr(type_int); - auto array1 = ArrayColumn::create(Int32Column::create(), UInt32Column::create()); + auto array1 = ArrayColumn::create(NullableColumn::create(Int32Column::create(), NullColumn::create()), + UInt32Column::create()); array1->append_datum(Datum(DatumArray())); expr->add_child(new_fake_const_expr(array1, type_array_int)); @@ -275,7 +276,8 @@ TEST_F(ArrayElementExprTest, test_one_dim_array) { { std::unique_ptr expr = create_array_element_expr(type_int); - auto array1 = ArrayColumn::create(Int32Column::create(), UInt32Column::create()); + auto array1 = ArrayColumn::create(NullableColumn::create(Int32Column::create(), NullColumn::create()), + UInt32Column::create()); array1->append_datum(Datum(DatumArray())); expr->add_child(new_fake_const_expr(array1, type_array_int)); @@ -300,7 +302,8 @@ TEST_F(ArrayElementExprTest, test_one_dim_array) { { std::unique_ptr expr = create_array_element_expr(type_int); - auto array1 = ArrayColumn::create(Int32Column::create(), UInt32Column::create()); + auto array1 = ArrayColumn::create(NullableColumn::create(Int32Column::create(), NullColumn::create()), + UInt32Column::create()); array1->append_datum(Datum(DatumArray())); expr->add_child(new_fake_const_expr(array1, type_array_int)); diff --git a/be/test/exprs/bitmap_functions_test.cpp b/be/test/exprs/bitmap_functions_test.cpp index 9dfa4342f2f09..ffbe9167d04d9 100644 --- a/be/test/exprs/bitmap_functions_test.cpp +++ b/be/test/exprs/bitmap_functions_test.cpp @@ -1921,8 +1921,10 @@ TEST_F(VecBitmapFunctionsTest, base64ToBitmapTest) { TEST_F(VecBitmapFunctionsTest, array_to_bitmap_test) { auto builder = [](const Buffer& val) { - auto ele_column = Int64Column::create(); - ele_column->append(val); + auto ele_column = NullableColumn::create(Int64Column::create(), NullColumn::create()); + for (auto& v : val) { + ele_column->append_datum(v); + } auto offset_column = UInt32Column::create(); offset_column->append(0); offset_column->append(val.size()); diff --git a/be/test/storage/column_aggregator_test.cpp b/be/test/storage/column_aggregator_test.cpp index e89c6f754694b..7fdf7038a6dad 100644 --- a/be/test/storage/column_aggregator_test.cpp +++ b/be/test/storage/column_aggregator_test.cpp @@ -612,7 +612,7 @@ TEST(ColumnAggregator, testArrayReplace) { FieldPtr field = std::make_shared(1, "test_array", array_type_info, StorageAggregateType::STORAGE_AGGREGATE_REPLACE, 1, false, false); - auto agg_elements = BinaryColumn::create(); + auto agg_elements = NullableColumn::create(BinaryColumn::create(), NullColumn::create()); auto agg_offsets = UInt32Column::create(); auto agg = ArrayColumn::create(agg_elements, agg_offsets); @@ -621,11 +621,11 @@ TEST(ColumnAggregator, testArrayReplace) { std::vector loops; // first chunk column - auto elements = BinaryColumn::create(); + auto elements = NullableColumn::create(BinaryColumn::create(), NullColumn::create()); auto offsets = UInt32Column::create(); auto src = ArrayColumn::create(elements, offsets); for (int i = 0; i < 10; ++i) { - elements->append(Slice(std::to_string(i))); + elements->append_datum(Slice(std::to_string(i))); } offsets->append(2); offsets->append(5); @@ -645,7 +645,7 @@ TEST(ColumnAggregator, testArrayReplace) { // second chunk column src->reset_column(); for (int i = 10; i < 20; ++i) { - elements->append(Slice(std::to_string(i))); + elements->append_datum(Slice(std::to_string(i))); } offsets->append(2); offsets->append(7); @@ -668,7 +668,7 @@ TEST(ColumnAggregator, testArrayReplace) { // third chunk column src->reset_column(); for (int i = 20; i < 30; ++i) { - elements->append(Slice(std::to_string(i))); + elements->append_datum(Slice(std::to_string(i))); } offsets->append(10); diff --git a/be/test/storage/rowset/column_reader_writer_test.cpp b/be/test/storage/rowset/column_reader_writer_test.cpp index 3f1a667fc0f95..f9cb01abf1843 100644 --- a/be/test/storage/rowset/column_reader_writer_test.cpp +++ b/be/test/storage/rowset/column_reader_writer_test.cpp @@ -330,18 +330,18 @@ class ColumnReaderWriterTest : public testing::Test { array_column.add_sub_column(int_column); auto src_offsets = UInt32Column::create(); - auto src_elements = Int32Column::create(); + auto src_elements = NullableColumn::create(Int32Column::create(), NullColumn::create()); ColumnPtr src_column = ArrayColumn::create(src_elements, src_offsets); // insert [1, 2, 3], [4, 5, 6] - src_elements->append(1); - src_elements->append(2); - src_elements->append(3); + src_elements->append_datum(1); + src_elements->append_datum(2); + src_elements->append_datum(3); src_offsets->append(3); - src_elements->append(4); - src_elements->append(5); - src_elements->append(6); + src_elements->append_datum(4); + src_elements->append_datum(5); + src_elements->append_datum(6); src_offsets->append(6); TypeInfoPtr type_info = get_type_info(array_column); @@ -410,7 +410,7 @@ class ColumnReaderWriterTest : public testing::Test { ASSERT_TRUE(st.ok()) << st.to_string(); auto dst_offsets = UInt32Column::create(); - auto dst_elements = Int32Column::create(); + auto dst_elements = NullableColumn::create(Int32Column::create(), NullColumn::create()); auto dst_column = ArrayColumn::create(dst_elements, dst_offsets); size_t rows_read = src_column->size(); st = iter->next_batch(&rows_read, dst_column.get()); @@ -422,7 +422,7 @@ class ColumnReaderWriterTest : public testing::Test { } ASSERT_EQ(2, meta.num_rows()); - ASSERT_EQ(36, reader->total_mem_footprint()); + ASSERT_EQ(42, reader->total_mem_footprint()); } } diff --git a/be/test/storage/rowset/map_column_rw_test.cpp b/be/test/storage/rowset/map_column_rw_test.cpp index 1282cf96a235b..a8035b94c1d24 100644 --- a/be/test/storage/rowset/map_column_rw_test.cpp +++ b/be/test/storage/rowset/map_column_rw_test.cpp @@ -63,30 +63,30 @@ class MapColumnRWTest : public testing::Test { map_column.add_sub_column(value_column); auto src_offsets = UInt32Column::create(); - auto src_keys = Int32Column::create(); - auto src_values = Int32Column::create(); + auto src_keys = NullableColumn::create(Int32Column::create(), NullColumn::create()); + auto src_values = NullableColumn::create(Int32Column::create(), NullColumn::create()); ColumnPtr src_column = MapColumn::create(src_keys, src_values, src_offsets); // {1 = 1} - src_keys->append(1); - src_values->append(1); + src_keys->append_datum(1); + src_values->append_datum(1); src_offsets->append(1); // {} src_offsets->append(1); // { 2 = 200, 3 = 3000} - src_keys->append(2); - src_keys->append(3); - src_values->append(200); - src_values->append(3000); + src_keys->append_datum(2); + src_keys->append_datum(3); + src_values->append_datum(200); + src_values->append_datum(3000); src_offsets->append(3); // { 4 = -1, 5 = -2, 6 = -3} - src_keys->append(4); - src_keys->append(5); - src_keys->append(6); - src_values->append(-1); - src_values->append(-2); - src_values->append(-3); + src_keys->append_datum(4); + src_keys->append_datum(5); + src_keys->append_datum(6); + src_values->append_datum(-1); + src_values->append_datum(-2); + src_values->append_datum(-3); src_offsets->append(6); TypeInfoPtr type_info = get_type_info(map_column); @@ -163,8 +163,8 @@ class MapColumnRWTest : public testing::Test { ASSERT_TRUE(st.ok()) << st.to_string(); auto dst_offsets = UInt32Column::create(); - auto dst_keys = Int32Column::create(); - auto dst_values = Int32Column::create(); + auto dst_keys = NullableColumn::create(Int32Column::create(), NullColumn::create()); + auto dst_values = NullableColumn::create(Int32Column::create(), NullColumn::create()); auto dst_column = MapColumn::create(dst_keys, dst_values, dst_offsets); size_t rows_read = src_column->size(); st = iter->next_batch(&rows_read, dst_column.get()); @@ -188,4 +188,4 @@ TEST_F(MapColumnRWTest, test_map_int) { test_int_map(); } -} // namespace starrocks \ No newline at end of file +} // namespace starrocks diff --git a/fe/fe-core/src/main/java/com/starrocks/analysis/CloneExpr.java b/fe/fe-core/src/main/java/com/starrocks/analysis/CloneExpr.java index 735b5fa887e6c..fd78ddd8375b1 100644 --- a/fe/fe-core/src/main/java/com/starrocks/analysis/CloneExpr.java +++ b/fe/fe-core/src/main/java/com/starrocks/analysis/CloneExpr.java @@ -37,6 +37,12 @@ public Type getType() { return getChild(0).getType(); } + @Override + public void setType(Type type) { + super.setType(type); + getChild(0).setType(type); + } + @Override protected String toSqlImpl() { return "clone(" + getChild(0).toSqlImpl() + ")"; diff --git a/fe/fe-core/src/main/java/com/starrocks/analysis/Expr.java b/fe/fe-core/src/main/java/com/starrocks/analysis/Expr.java index 950e9a2ec0c9e..fb6481b31986e 100644 --- a/fe/fe-core/src/main/java/com/starrocks/analysis/Expr.java +++ b/fe/fe-core/src/main/java/com/starrocks/analysis/Expr.java @@ -743,8 +743,8 @@ public interface ExprVisitor { final void treeToThriftHelper(TExpr container, ExprVisitor visitor) { TExprNode msg = new TExprNode(); - Preconditions.checkState(!type.isNull()); - Preconditions.checkState(!Objects.equal(Type.ARRAY_NULL, type)); + Preconditions.checkState(!type.isNull(), "NULL_TYPE is illegal in thrift stage"); + Preconditions.checkState(!Objects.equal(Type.ARRAY_NULL, type), "Array is illegal in thrift stage"); msg.type = type.toThrift(); msg.num_children = children.size(); diff --git a/fe/fe-core/src/main/java/com/starrocks/sql/plan/ScalarOperatorToExpr.java b/fe/fe-core/src/main/java/com/starrocks/sql/plan/ScalarOperatorToExpr.java index 24896861e5308..91085e01f9ccf 100644 --- a/fe/fe-core/src/main/java/com/starrocks/sql/plan/ScalarOperatorToExpr.java +++ b/fe/fe-core/src/main/java/com/starrocks/sql/plan/ScalarOperatorToExpr.java @@ -86,6 +86,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.stream.Collectors; public class ScalarOperatorToExpr { @@ -130,6 +131,23 @@ public static class Formatter extends ScalarOperatorVisitor + * So we need to do some hack when transforming ScalarOperator to Expr. + */ + private static void hackTypeNull(Expr expr) { + // For primitive types, this can be any legitimate type, for simplicity, we pick boolean. + if (expr.getType().isNull()) { + expr.setType(Type.BOOLEAN); + return; + } + + // For array types, itemType can be any legitimate type, for simplicity, we pick boolean. + if (Objects.equals(Type.ARRAY_NULL, expr.getType())) { + expr.setType(Type.ARRAY_BOOLEAN); + } + } + @Override public Expr visit(ScalarOperator scalarOperator, FormatterContext context) { throw new UnsupportedOperationException( @@ -141,47 +159,50 @@ public Expr visitVariableReference(ColumnRefOperator node, FormatterContext cont Expr expr = context.colRefToExpr.get(node); if (context.projectOperatorMap.containsKey(node) && expr == null) { expr = buildExpr.build(context.projectOperatorMap.get(node), context); - if (expr.getType().isNull()) { - // NULL_TYPE hack, this can be any legitimate type, for simplicity, we pick boolean. - expr.setType(Type.BOOLEAN); - } + hackTypeNull(expr); context.colRefToExpr.put(node, expr); return expr; } if (expr.getType().isNull()) { - // NULL_TYPE hack, this can be any legitimate type, for simplicity, we pick boolean. - expr.setType(Type.BOOLEAN); + hackTypeNull(expr); } return expr; } @Override public Expr visitSubfield(SubfieldOperator node, FormatterContext context) { - return new SubfieldExpr(buildExpr.build(node.getChild(0), context), node.getType(), node.getFieldNames()); + SubfieldExpr expr = new SubfieldExpr(buildExpr.build(node.getChild(0), context), node.getType(), + node.getFieldNames()); + hackTypeNull(expr); + return expr; } @Override public Expr visitArray(ArrayOperator node, FormatterContext context) { - // NULL_TYPE hack, itemType can be any legitimate type, for simplicity, we pick boolean. - Type finalType = Type.ARRAY_NULL.equals(node.getType()) ? Type.ARRAY_BOOLEAN : node.getType(); - return new ArrayExpr(finalType, + ArrayExpr expr = new ArrayExpr(node.getType(), node.getChildren().stream().map(e -> buildExpr.build(e, context)).collect(Collectors.toList())); + hackTypeNull(expr); + return expr; } @Override public Expr visitCollectionElement(CollectionElementOperator node, FormatterContext context) { - return new CollectionElementExpr(node.getType(), buildExpr.build(node.getChild(0), context), - buildExpr.build(node.getChild(1), context)); + CollectionElementExpr expr = + new CollectionElementExpr(node.getType(), buildExpr.build(node.getChild(0), context), + buildExpr.build(node.getChild(1), context)); + hackTypeNull(expr); + return expr; } @Override public Expr visitArraySlice(ArraySliceOperator node, FormatterContext context) { - ArraySliceExpr arraySliceExpr = new ArraySliceExpr(buildExpr.build(node.getChild(0), context), + ArraySliceExpr expr = new ArraySliceExpr(buildExpr.build(node.getChild(0), context), buildExpr.build(node.getChild(1), context), buildExpr.build(node.getChild(2), context)); - arraySliceExpr.setType(node.getType()); - return arraySliceExpr; + expr.setType(node.getType()); + hackTypeNull(expr); + return expr; } @Override @@ -190,12 +211,8 @@ public Expr visitConstant(ConstantOperator literal, FormatterContext context) { Type type = literal.getType(); if (literal.isNull()) { NullLiteral nullLiteral = new NullLiteral(); - if (literal.getType().isNull()) { - // NULL_TYPE hack, this can be any legitimate type, for simplicity, we pick boolean. - nullLiteral.setType(Type.BOOLEAN); - } else { - nullLiteral.setType(literal.getType()); - } + nullLiteral.setType(literal.getType()); + hackTypeNull(nullLiteral); nullLiteral.setOriginType(Type.NULL); return nullLiteral; } @@ -381,7 +398,8 @@ public Expr visitLikePredicateOperator(LikePredicateOperator predicate, Formatte expr = new LikePredicate(LikePredicate.Operator.LIKE, child1, child2); } - expr.setFn(Expr.getBuiltinFunction(expr.getOp().name(), new Type[] {child1.getType(), child2.getType()}, + expr.setFn(Expr.getBuiltinFunction(expr.getOp().name(), + new Type[] {child1.getType(), child2.getType()}, Function.CompareMode.IS_NONSTRICT_SUPERTYPE_OF)); expr.setType(Type.BOOLEAN); @@ -503,13 +521,16 @@ public Expr visitCall(CallOperator call, FormatterContext context) { break; } callExpr.setType(call.getType()); + hackTypeNull(callExpr); return callExpr; } @Override public Expr visitCastOperator(CastOperator operator, FormatterContext context) { - CastExpr expr = new CastExpr(operator.getType(), buildExpr.build(operator.getChild(0), context)); + CastExpr expr = + new CastExpr(operator.getType(), buildExpr.build(operator.getChild(0), context)); expr.setImplicit(context.implicitCast); + hackTypeNull(expr); return expr; } @@ -535,6 +556,7 @@ public Expr visitCaseWhenOperator(CaseWhenOperator operator, FormatterContext co CaseExpr result = new CaseExpr(caseExpr, list, elseExpr); result.setType(operator.getType()); + hackTypeNull(result); return result; } @@ -546,6 +568,7 @@ public Expr visitLambdaFunctionOperator(LambdaFunctionOperator operator, Formatt for (ColumnRefOperator ref : operator.getRefColumns()) { SlotRef slot = new SlotRef(new SlotDescriptor( new SlotId(ref.getId()), ref.getName(), ref.getType(), ref.isNullable())); + hackTypeNull(slot); context.colRefToExpr.put(ref, slot); arguments.add(slot); } @@ -557,6 +580,7 @@ public Expr visitLambdaFunctionOperator(LambdaFunctionOperator operator, Formatt ColumnRefOperator ref = kv.getKey(); SlotRef slot = new SlotRef(new SlotDescriptor( new SlotId(ref.getId()), ref.getName(), ref.getType(), ref.isNullable())); + hackTypeNull(slot); commonSubOperatorMap.put(slot, buildExpr.build(kv.getValue(), context)); context.colRefToExpr.put(ref, slot); } @@ -596,6 +620,7 @@ public Expr visitDictMappingOperator(DictMappingOperator operator, FormatterCont } Expr result = new DictMappingExpr(dictExpr, callExpr); result.setType(operator.getType()); + hackTypeNull(result); return result; } diff --git a/fe/fe-core/src/test/java/com/starrocks/sql/plan/ArrayTypeTest.java b/fe/fe-core/src/test/java/com/starrocks/sql/plan/ArrayTypeTest.java index a30b1f0333208..8836320b11431 100644 --- a/fe/fe-core/src/test/java/com/starrocks/sql/plan/ArrayTypeTest.java +++ b/fe/fe-core/src/test/java/com/starrocks/sql/plan/ArrayTypeTest.java @@ -269,5 +269,21 @@ public void testEmptyArray() throws Exception { "(9223372036854775807,[[9223372036854775807]],[[9223372036854775807]]);\n"; getFragmentPlan(sql); } + { + String sql = "select array_append([],null)"; + getThriftPlan(sql); + } + { + String sql = "select [][1]"; + getThriftPlan(sql); + } + { + String sql = "select array_append([], [])"; + getThriftPlan(sql); + } + { + String sql = "select array_append([[]], [])"; + getThriftPlan(sql); + } } }