diff --git a/src/Functions/array/arrayElement.cpp b/src/Functions/array/arrayElement.cpp index 51f2ef659cdd..cea407aee027 100644 --- a/src/Functions/array/arrayElement.cpp +++ b/src/Functions/array/arrayElement.cpp @@ -1,23 +1,25 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include -#include +#include +#include +#include #include -#include #include -#include #include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include +#include namespace DB @@ -46,7 +48,7 @@ class FunctionArrayElement : public IFunction { public: static constexpr auto name = "arrayElement"; - static FunctionPtr create(ContextPtr context); + static FunctionPtr create(ContextPtr context_); String getName() const override; @@ -63,16 +65,49 @@ class FunctionArrayElement : public IFunction ArrayImpl::NullMapBuilder & builder, size_t input_rows_count) const; template - static ColumnPtr executeNumberConst(const ColumnsWithTypeAndName & arguments, const Field & index, ArrayImpl::NullMapBuilder & builder); + static ColumnPtr executeNumberConst( + const ColumnsWithTypeAndName & arguments, + const DataTypePtr & result_type, + const Field & index, + ArrayImpl::NullMapBuilder & builder); + + template + static ColumnPtr executeNumber( + const ColumnsWithTypeAndName & arguments, + const DataTypePtr & result_type, + const PaddedPODArray & indices, + ArrayImpl::NullMapBuilder & builder); + + /// Optimize arrayElement when first argument has type Array(T) or Array(Nullable(T)) in which T is number type + template + static ColumnPtr executeArrayNumberConst( + const ColumnsWithTypeAndName & arguments, + const DataTypePtr & result_type, + const Field & index, + ArrayImpl::NullMapBuilder & builder); + /// Optimize arrayElement when first argument has type Array(T) or Array(Nullable(T)) in which T is number type template - static ColumnPtr executeNumber(const ColumnsWithTypeAndName & arguments, const PaddedPODArray & indices, ArrayImpl::NullMapBuilder & builder); + static ColumnPtr executeArrayNumber( + const ColumnsWithTypeAndName & arguments, + const DataTypePtr & result_type, + const PaddedPODArray & indices, + ArrayImpl::NullMapBuilder & builder); static ColumnPtr executeStringConst(const ColumnsWithTypeAndName & arguments, const Field & index, ArrayImpl::NullMapBuilder & builder); template static ColumnPtr executeString(const ColumnsWithTypeAndName & arguments, const PaddedPODArray & indices, ArrayImpl::NullMapBuilder & builder); + /// Optimize arrayElement when first argument has type Array(String) or Array(Nullable(String)) + static ColumnPtr + executeArrayStringConst(const ColumnsWithTypeAndName & arguments, const Field & index, ArrayImpl::NullMapBuilder & builder); + + /// Optimize arrayElement when first argument has type Array(String) or Array(Nullable(String)) + template + static ColumnPtr executeArrayString( + const ColumnsWithTypeAndName & arguments, const PaddedPODArray & indices, ArrayImpl::NullMapBuilder & builder); + static ColumnPtr executeGenericConst(const ColumnsWithTypeAndName & arguments, const Field & index, ArrayImpl::NullMapBuilder & builder); template @@ -91,6 +126,10 @@ class FunctionArrayElement : public IFunction */ ColumnPtr executeTuple(const ColumnsWithTypeAndName & arguments, size_t input_rows_count) const; + /** For a map array, the function is evaluated component-wise for its keys and values + */ + ColumnPtr executeMap2(const ColumnsWithTypeAndName & arguments, size_t input_rows_count) const; + /** For a map the function finds the matched value for a key. * Currently implemented just as linear search in array. * However, optimizations are possible. @@ -123,6 +162,8 @@ class FunctionArrayElement : public IFunction static void executeMatchConstKeyToIndex( size_t num_rows, size_t num_values, PaddedPODArray & matched_idxs, const Matcher & matcher); + + ContextPtr context; }; @@ -181,9 +222,11 @@ struct ArrayElementNumImpl */ template static void vectorConst( - const PaddedPODArray & data, const ColumnArray::Offsets & offsets, + const PaddedPODArray & data, + const ColumnArray::Offsets & offsets, const ColumnArray::Offset index, - PaddedPODArray & result, ArrayImpl::NullMapBuilder & builder) + PaddedPODArray & result, + ArrayImpl::NullMapBuilder & builder) { size_t size = offsets.size(); result.resize(size); @@ -220,9 +263,11 @@ struct ArrayElementNumImpl */ template static void vector( - const PaddedPODArray & data, const ColumnArray::Offsets & offsets, + const PaddedPODArray & data, + const ColumnArray::Offsets & offsets, const PaddedPODArray & indices, - PaddedPODArray & result, ArrayImpl::NullMapBuilder & builder) + PaddedPODArray & result, + ArrayImpl::NullMapBuilder & builder) { size_t size = offsets.size(); result.resize(size); @@ -262,6 +307,354 @@ struct ArrayElementNumImpl } }; + +template +struct ArrayElementArrayNumImpl +{ + /** Implementation for constant index. + * If negative = false - index is from beginning of array, started from 0. + * If negative = true - index is from end of array, started from 0. + * nullable_number is true when process input with type Array(Array(Nullable(T))) + */ + template + static void vectorConst( + const PaddedPODArray & data, + const ColumnArray::Offsets & offsets, + const ColumnArray::Offsets & nested_offsets, + const NullMap * number_null_map, + const ColumnArray::Offset index, + PaddedPODArray & result_data, + ColumnArray::Offsets & result_offsets, + NullMap * result_number_null_map, + ArrayImpl::NullMapBuilder & builder) + { + size_t size = offsets.size(); + + /// First calculate the size of result_data or result_number_null_map + size_t result_data_size = 0; + for (size_t i = 0; i < size; ++i) + { + size_t array_size = offsets[i] - offsets[i-1]; + if (index < array_size) + { + size_t j = !negative ? (offsets[i - 1] + index) : (offsets[i] - index - 1); + ColumnArray::Offset nested_array_size = nested_offsets[j] - nested_offsets[j-1]; + result_data_size += nested_array_size; + } + } + + /// Allocate enough memory in advance + result_data.resize(result_data_size); + result_offsets.resize(size); + if constexpr (nullable_number) + result_number_null_map->resize(result_data_size); + + ColumnArray::Offset current_offset = 0; + for (size_t i = 0; i < size; ++i) + { + size_t array_size = offsets[i] - offsets[i-1]; + if (index < array_size) + { + size_t j = !negative ? (offsets[i - 1] + index) : (offsets[i] - index - 1); + if (builder) + builder.update(j); + + ColumnArray::Offset nested_array_size = nested_offsets[j] - nested_offsets[j-1]; + ColumnArray::Offset nested_array_pos = nested_offsets[j-1]; + memcpy(&result_data[current_offset], &data[nested_array_pos], nested_array_size * sizeof(T)); + if constexpr (nullable_number) + memcpy(&(*result_number_null_map)[current_offset], &(*number_null_map)[nested_array_pos], nested_array_size); + + current_offset += nested_array_size; + } + else + { + /// Empty Array(T), no need to copy anything + if (builder) + builder.update(); + } + + result_offsets[i] = current_offset; + } + } + + /** Implementation for non-constant index. + */ + template + static void vector( + const PaddedPODArray & data, + const ColumnArray::Offsets & offsets, + const ColumnArray::Offsets & nested_offsets, + const NullMap * number_null_map, + const PaddedPODArray & indices, + PaddedPODArray & result_data, + ColumnArray::Offsets & result_offsets, + NullMap * result_number_null_map, + ArrayImpl::NullMapBuilder & builder) + { + size_t size = offsets.size(); + + /// First calculate the size of result_data or result_number_null_map + size_t result_data_size = 0; + for (size_t i = 0; i < size; ++i) + { + size_t array_size = offsets[i] - offsets[i-1]; + TIndex index = indices[i]; + if (index > 0 && static_cast(index) <= array_size) + { + size_t j = offsets[i - 1] + index - 1; + ColumnArray::Offset nested_array_size = nested_offsets[j] - nested_offsets[j - 1]; + result_data_size += nested_array_size; + } + else if (index < 0 && -static_cast(index) <= array_size) + { + size_t j = offsets[i] + index; + ColumnArray::Offset nested_array_size = nested_offsets[j] - nested_offsets[j - 1]; + result_data_size += nested_array_size; + } + } + + /// Allocate enough memory in advance + result_data.resize(result_data_size); + result_offsets.resize(size); + if constexpr (nullable_number) + result_number_null_map->resize(result_data_size); + + ColumnArray::Offset current_offset = 0; + for (size_t i = 0; i < size; ++i) + { + size_t array_size = offsets[i] - offsets[i - 1]; + + TIndex index = indices[i]; + if (index > 0 && static_cast(index) <= array_size) + { + size_t j = offsets[i - 1] + index - 1; + if (builder) + builder.update(j); + + ColumnArray::Offset nested_array_size = nested_offsets[j] - nested_offsets[j-1]; + ColumnArray::Offset nested_array_pos = nested_offsets[j-1]; + memcpy(&result_data[current_offset], &data[nested_array_pos], nested_array_size * sizeof(T)); + if constexpr (nullable_number) + memcpy(&(*result_number_null_map)[current_offset], &(*number_null_map)[nested_array_pos], nested_array_size); + + current_offset += nested_array_size; + } + else if (index < 0 && -static_cast(index) <= array_size) + { + size_t j = offsets[i] + index; + if (builder) + builder.update(j); + + ColumnArray::Offset nested_array_size = nested_offsets[j] - nested_offsets[j - 1]; + ColumnArray::Offset nested_array_pos = nested_offsets[j-1]; + memcpy(&result_data[current_offset], &data[nested_array_pos], nested_array_size * sizeof(T)); + if constexpr (nullable_number) + memcpy(&(*result_number_null_map)[current_offset], &(*number_null_map)[nested_array_pos], nested_array_size); + + current_offset += nested_array_size; + } + else + { + /// Empty Array(T), no need to copy anything + if (builder) + builder.update(); + } + + result_offsets[i] = current_offset; + } + } +}; + +struct ArrayElementArrayStringImpl +{ + /// nullable_string is true when process input with type Array(Array(Nullable(String))) + template + static void vectorConst( + const ColumnString::Chars & data, + const ColumnArray::Offsets & offsets, + const ColumnArray::Offsets & nested_offsets, + const ColumnString::Offsets & string_offsets, + const NullMap * string_null_map, + const ColumnArray::Offset index, + ColumnString::Chars & result_data, + ColumnArray::Offsets & result_offsets, + ColumnArray::Offsets & result_string_offsets, + NullMap * result_string_null_map, + ArrayImpl::NullMapBuilder & builder) + { + size_t size = offsets.size(); + + /// First calculate size of result_data(total count of strings) and result_string_offsets(total size of strings) + size_t result_data_size = 0; + size_t result_strings_size = 0; + for (size_t i = 0; i < size; ++i) + { + size_t array_size = offsets[i] - offsets[i-1]; + if (index < array_size) + { + size_t adjusted_index = !negative ? index : (array_size - index - 1); + size_t j = offsets[i - 1] + adjusted_index; + + auto nested_array_start = nested_offsets[j - 1]; + auto nested_array_size = nested_offsets[j] - nested_array_start; + + result_data_size += string_offsets[nested_array_start + nested_array_size - 1] - string_offsets[nested_array_start - 1]; + result_strings_size += nested_array_size; + } + } + + /// Allocate enough memory in advance + result_data.resize(result_data_size); + result_offsets.resize(size); + result_string_offsets.reserve(result_strings_size); + if constexpr (nullable_string) + result_string_null_map->reserve(result_strings_size); + + ColumnArray::Offset current_offset = 0; + ColumnArray::Offset current_string_offset = 0; + for (size_t i = 0; i < size; ++i) + { + size_t array_size = offsets[i] - offsets[i-1]; + + if (index < array_size) + { + size_t adjusted_index = !negative ? index : (array_size - index - 1); + + size_t j = offsets[i - 1] + adjusted_index; + if (builder) + builder.update(j); + + auto nested_array_start = nested_offsets[j - 1]; + auto nested_array_size = nested_offsets[j] - nested_array_start; + + /// For each String in Array(String), append it to result_data and update result_offsets and result_string_offsets + for (size_t k = 0; k < nested_array_size; ++k) + { + auto string_start = string_offsets[nested_array_start + k - 1]; + auto string_size = string_offsets[nested_array_start + k] - string_start; + memcpySmallAllowReadWriteOverflow15(&result_data[current_string_offset], &data[string_start], string_size); + current_string_offset += string_size; + result_string_offsets.push_back(current_string_offset); + + if constexpr (nullable_string) + result_string_null_map->push_back((*string_null_map)[nested_array_start + k]); + } + current_offset += nested_array_size; + } + else + { + /// Insert empty Array(String) or Array(Nullable(String)), no need to copy anything + if (builder) + builder.update(); + } + result_offsets[i] = current_offset; + } + } + + /** Implementation for non-constant index. + */ + template + static void vector( + const ColumnString::Chars & data, + const ColumnArray::Offsets & offsets, + const ColumnArray::Offsets & nested_offsets, + const ColumnString::Offsets & string_offsets, + const NullMap * string_null_map, + const PaddedPODArray & indices, + ColumnString::Chars & result_data, + ColumnArray::Offsets & result_offsets, + ColumnArray::Offsets & result_string_offsets, + NullMap * result_string_null_map, + ArrayImpl::NullMapBuilder & builder) + { + size_t size = offsets.size(); + + /// First calculate size of result_data(total count of strings) and result_string_offsets(total size of strings) + size_t result_data_size = 0; + size_t result_strings_size = 0; + for (size_t i = 0; i < size; ++i) + { + size_t array_size = offsets[i] - offsets[i-1]; + size_t adjusted_index; /// index in array from zero + TIndex index = indices[i]; + if (index > 0 && static_cast(index) <= array_size) + adjusted_index = index - 1; + else if (index < 0 && -static_cast(index) <= array_size) + adjusted_index = array_size + index; + else + adjusted_index = array_size; /// means no element should be taken + + if (adjusted_index < array_size) + { + size_t j = offsets[i - 1] + adjusted_index; + + auto nested_array_start = nested_offsets[j - 1]; + auto nested_array_size = nested_offsets[j] - nested_array_start; + + result_data_size += string_offsets[nested_array_start + nested_array_size - 1] - string_offsets[nested_array_start - 1]; + result_strings_size += nested_array_size; + } + } + + /// Allocate enough memory in advance + result_data.resize(result_data_size); + result_offsets.resize(size); + result_string_offsets.reserve(result_strings_size); + if constexpr (nullable_string) + result_string_null_map->reserve(result_strings_size); + + ColumnArray::Offset current_offset = 0; + ColumnArray::Offset current_string_offset = 0; + for (size_t i = 0; i < size; ++i) + { + size_t array_size = offsets[i] - offsets[i-1]; + size_t adjusted_index; /// index in array from zero + + TIndex index = indices[i]; + if (index > 0 && static_cast(index) <= array_size) + adjusted_index = index - 1; + else if (index < 0 && -static_cast(index) <= array_size) + adjusted_index = array_size + index; + else + adjusted_index = array_size; /// means no element should be taken + + + if (adjusted_index < array_size) + { + size_t j = offsets[i - 1] + adjusted_index; + if (builder) + builder.update(j); + + auto nested_array_start = nested_offsets[j - 1]; + auto nested_array_size = nested_offsets[j] - nested_array_start; + + /// For each String in Array(String), append it to result_data and update result_offsets and result_string_offsets + for (size_t k = 0; k < nested_array_size; ++k) + { + auto string_start = string_offsets[nested_array_start + k - 1]; + auto string_size = string_offsets[nested_array_start + k] - string_start; + memcpySmallAllowReadWriteOverflow15(&result_data[current_string_offset], &data[string_start], string_size); + current_string_offset += string_size; + result_string_offsets.push_back(current_string_offset); + + if constexpr (nullable_string) + result_string_null_map->push_back((*string_null_map)[nested_array_start + k]); + } + current_offset += nested_array_size; + } + else + { + /// Insert empty Array(String), no need to copy anything + if (builder) + builder.update(); + } + result_offsets[i] = current_offset; + } + } +}; + + struct ArrayElementStringImpl { template @@ -495,25 +888,28 @@ FunctionPtr FunctionArrayElement::create(ContextPtr) template ColumnPtr FunctionArrayElement::executeNumberConst( - const ColumnsWithTypeAndName & arguments, const Field & index, ArrayImpl::NullMapBuilder & builder) + const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const Field & index, ArrayImpl::NullMapBuilder & builder) { - const ColumnArray * col_array = checkAndGetColumn(arguments[0].column.get()); + using ColVecType = ColumnVectorOrDecimal; + const ColumnArray * col_array = checkAndGetColumn(arguments[0].column.get()); if (!col_array) return nullptr; - const ColumnVector * col_nested = checkAndGetColumn>(&col_array->getData()); - + const ColVecType * col_nested = checkAndGetColumn(&col_array->getData()); if (!col_nested) return nullptr; - auto col_res = ColumnVector::create(); + auto col_res = result_type->createColumn(); + ColVecType * col_res_vec = typeid_cast(col_res.get()); + if (!col_res_vec) + return nullptr; if (index.getType() == Field::Types::UInt64 || (index.getType() == Field::Types::Int64 && index.get() >= 0)) { ArrayElementNumImpl::template vectorConst( - col_nested->getData(), col_array->getOffsets(), index.get() - 1, col_res->getData(), builder); + col_nested->getData(), col_array->getOffsets(), index.get() - 1, col_res_vec->getData(), builder); } else if (index.getType() == Field::Types::Int64) { @@ -525,9 +921,8 @@ ColumnPtr FunctionArrayElement::executeNumberConst( /// Negative array indices work this way: /// arr[-1] is the element at offset 0 from the last /// arr[-2] is the element at offset 1 from the last and so on. - ArrayElementNumImpl::template vectorConst( - col_nested->getData(), col_array->getOffsets(), -(static_cast(index.safeGet()) + 1), col_res->getData(), builder); + col_nested->getData(), col_array->getOffsets(), -(static_cast(index.safeGet()) + 1), col_res_vec->getData(), builder); } else throw Exception(ErrorCodes::LOGICAL_ERROR, "Illegal type of array index"); @@ -537,22 +932,28 @@ ColumnPtr FunctionArrayElement::executeNumberConst( template ColumnPtr FunctionArrayElement::executeNumber( - const ColumnsWithTypeAndName & arguments, const PaddedPODArray & indices, ArrayImpl::NullMapBuilder & builder) + const ColumnsWithTypeAndName & arguments, + const DataTypePtr & result_type, + const PaddedPODArray & indices, + ArrayImpl::NullMapBuilder & builder) { - const ColumnArray * col_array = checkAndGetColumn(arguments[0].column.get()); + using ColVecType = ColumnVectorOrDecimal; + const ColumnArray * col_array = checkAndGetColumn(arguments[0].column.get()); if (!col_array) return nullptr; - const ColumnVector * col_nested = checkAndGetColumn>(&col_array->getData()); - + const ColVecType * col_nested = checkAndGetColumn(&col_array->getData()); if (!col_nested) return nullptr; - auto col_res = ColumnVector::create(); + auto col_res = result_type->createColumn(); + ColVecType * col_res_vec = typeid_cast(col_res.get()); + if (!col_res_vec) + return nullptr; ArrayElementNumImpl::template vector( - col_nested->getData(), col_array->getOffsets(), indices, col_res->getData(), builder); + col_nested->getData(), col_array->getOffsets(), indices, col_res_vec->getData(), builder); return col_res; } @@ -621,6 +1022,268 @@ FunctionArrayElement::executeStringConst(const ColumnsWithTypeAndName & argument return col_res; } +ColumnPtr FunctionArrayElement::executeArrayStringConst( + const ColumnsWithTypeAndName & arguments, const Field & index, ArrayImpl::NullMapBuilder & builder) +{ + const ColumnArray * col_array = checkAndGetColumn(arguments[0].column.get()); + if (!col_array) + return nullptr; + + const ColumnArray * col_nested_array = checkAndGetColumn(&col_array->getData()); + if (!col_nested_array) + return nullptr; + + const ColumnString * col_nested_elem = nullptr; + const auto * col_nullable = checkAndGetColumn(col_nested_array->getDataPtr().get()); + if (!col_nullable) + col_nested_elem = checkAndGetColumn(col_nested_array->getDataPtr().get()); + else + col_nested_elem = checkAndGetColumn(col_nullable->getNestedColumnPtr().get()); + + if (!col_nested_elem) + return nullptr; + + const auto * string_null_map = col_nullable ? &col_nullable->getNullMapColumn() : nullptr; + auto res_string = ColumnString::create(); + auto res_offsets = ColumnArray::ColumnOffsets::create(); + auto res_string_null_map = col_nullable ? ColumnUInt8::create() : nullptr; + if (index.getType() == Field::Types::UInt64 + || (index.getType() == Field::Types::Int64 && index.get() >= 0)) + { + if (col_nullable) + ArrayElementArrayStringImpl::vectorConst( + col_nested_elem->getChars(), + col_array->getOffsets(), + col_nested_array->getOffsets(), + col_nested_elem->getOffsets(), + &string_null_map->getData(), + index.get() - 1, + res_string->getChars(), + res_offsets->getData(), + res_string->getOffsets(), + &res_string_null_map->getData(), + builder); + else + ArrayElementArrayStringImpl::vectorConst( + col_nested_elem->getChars(), + col_array->getOffsets(), + col_nested_array->getOffsets(), + col_nested_elem->getOffsets(), + nullptr, + index.get() - 1, + res_string->getChars(), + res_offsets->getData(), + res_string->getOffsets(), + nullptr, + builder); + } + else if (index.getType() == Field::Types::Int64) + { + if (col_nullable) + ArrayElementArrayStringImpl::vectorConst( + col_nested_elem->getChars(), + col_array->getOffsets(), + col_nested_array->getOffsets(), + col_nested_elem->getOffsets(), + &string_null_map->getData(), + -(UInt64(index.get()) + 1), + res_string->getChars(), + res_offsets->getData(), + res_string->getOffsets(), + &res_string_null_map->getData(), + builder); + else + ArrayElementArrayStringImpl::vectorConst( + col_nested_elem->getChars(), + col_array->getOffsets(), + col_nested_array->getOffsets(), + col_nested_elem->getOffsets(), + nullptr, + -(UInt64(index.get()) + 1), + res_string->getChars(), + res_offsets->getData(), + res_string->getOffsets(), + nullptr, + builder); + } + else + throw Exception(ErrorCodes::LOGICAL_ERROR, "Illegal type of array index"); + + if (col_nullable) + return ColumnArray::create(ColumnNullable::create(std::move(res_string), std::move(res_string_null_map)), std::move(res_offsets)); + else + return ColumnArray::create(std::move(res_string), std::move(res_offsets)); +} + +template +ColumnPtr FunctionArrayElement::executeArrayNumberConst( + const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const Field & index, ArrayImpl::NullMapBuilder & builder) +{ + using ColVecType = ColumnVectorOrDecimal; + + const ColumnArray * col_array = checkAndGetColumn(arguments[0].column.get()); + if (!col_array) + return nullptr; + + const ColumnArray * col_nested_array = checkAndGetColumn(col_array->getDataPtr().get()); + if (!col_nested_array) + return nullptr; + + const ColVecType * col_nested_elem = nullptr; + const auto * col_nullable = checkAndGetColumn(col_nested_array->getDataPtr().get()); + if (!col_nullable) + col_nested_elem = checkAndGetColumn(&col_nested_array->getData()); + else + col_nested_elem = checkAndGetColumn(col_nullable->getNestedColumnPtr().get()); + + if (!col_nested_elem) + return nullptr; + + auto res = result_type->createColumn(); + ColumnArray * res_array = typeid_cast(res.get()); + if (!res_array) + return nullptr; + + ColVecType * res_data = nullptr; + ColumnNullable * res_nullable = typeid_cast(&res_array->getData()); + if (!res_nullable) + res_data = typeid_cast(&res_array->getData()); + else + res_data = typeid_cast(&res_nullable->getNestedColumn()); + + const NullMap * null_map = col_nullable ? &col_nullable->getNullMapData() : nullptr; + auto & res_offsets = res_array->getOffsets(); + NullMap * res_null_map = res_nullable ? &res_nullable->getNullMapData() : nullptr; + + if (index.getType() == Field::Types::UInt64 || (index.getType() == Field::Types::Int64 && index.get() >= 0)) + { + if (col_nullable) + ArrayElementArrayNumImpl::template vectorConst( + col_nested_elem->getData(), + col_array->getOffsets(), + col_nested_array->getOffsets(), + null_map, + index.get() - 1, + res_data->getData(), + res_offsets, + res_null_map, + builder); + else + ArrayElementArrayNumImpl::template vectorConst( + col_nested_elem->getData(), + col_array->getOffsets(), + col_nested_array->getOffsets(), + null_map, + index.get() - 1, + res_data->getData(), + res_offsets, + res_null_map, + builder); + } + else if (index.getType() == Field::Types::Int64) + { + /// Cast to UInt64 before negation allows to avoid undefined behaviour for negation of the most negative number. + /// NOTE: this would be undefined behaviour in C++ sense, but nevertheless, compiler cannot see it on user provided data, + /// and generates the code that we want on supported CPU architectures (overflow in sense of two's complement arithmetic). + /// This is only needed to avoid UBSan report. + + /// Negative array indices work this way: + /// arr[-1] is the element at offset 0 from the last + /// arr[-2] is the element at offset 1 from the last and so on. + if (col_nullable) + ArrayElementArrayNumImpl::template vectorConst( + col_nested_elem->getData(), + col_array->getOffsets(), + col_nested_array->getOffsets(), + null_map, + -(static_cast(index.safeGet()) + 1), + res_data->getData(), + res_offsets, + res_null_map, + builder); + else + ArrayElementArrayNumImpl::template vectorConst( + col_nested_elem->getData(), + col_array->getOffsets(), + col_nested_array->getOffsets(), + null_map, + -(static_cast(index.safeGet()) + 1), + res_data->getData(), + res_offsets, + res_null_map, + builder); + } + else + throw Exception(ErrorCodes::LOGICAL_ERROR, "Illegal type of array index"); + + return res; +} + +template +ColumnPtr FunctionArrayElement::executeArrayNumber( + const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const PaddedPODArray & indices, ArrayImpl::NullMapBuilder & builder) +{ + using ColVecType = ColumnVectorOrDecimal; + + const ColumnArray * col_array = checkAndGetColumn(arguments[0].column.get()); + if (!col_array) + return nullptr; + + const ColumnArray * col_nested_array = checkAndGetColumn(col_array->getDataPtr().get()); + if (!col_nested_array) + return nullptr; + + const ColVecType * col_nested_elem = nullptr; + const auto * col_nullable = checkAndGetColumn(col_nested_array->getDataPtr().get()); + if (!col_nullable) + col_nested_elem = checkAndGetColumn(&col_nested_array->getData()); + else + col_nested_elem = checkAndGetColumn(col_nullable->getNestedColumnPtr().get()); + + if (!col_nested_elem) + return nullptr; + + MutableColumnPtr res = result_type->createColumn(); + ColumnArray * res_array = typeid_cast(res.get()); + if (!res_array) + return nullptr; + + ColVecType * res_data = nullptr; + ColumnNullable * res_nullable = typeid_cast(&res_array->getData()); + if (!res_nullable) + res_data = typeid_cast(&res_array->getData()); + else + res_data = typeid_cast(&res_nullable->getNestedColumn()); + + const NullMap * null_map = col_nullable ? &col_nullable->getNullMapData() : nullptr; + auto & res_offsets = res_array->getOffsets(); + NullMap * res_null_map = res_nullable ? &res_nullable->getNullMapData() : nullptr; + + if (col_nullable) + ArrayElementArrayNumImpl::template vector( + col_nested_elem->getData(), + col_array->getOffsets(), + col_nested_array->getOffsets(), + null_map, + indices, + res_data->getData(), + res_offsets, + res_null_map, + builder); + else + ArrayElementArrayNumImpl::template vector( + col_nested_elem->getData(), + col_array->getOffsets(), + col_nested_array->getOffsets(), + null_map, + indices, + res_data->getData(), + res_offsets, + res_null_map, + builder); + return res; +} + template ColumnPtr FunctionArrayElement::executeString( const ColumnsWithTypeAndName & arguments, const PaddedPODArray & indices, ArrayImpl::NullMapBuilder & builder) @@ -659,6 +1322,66 @@ ColumnPtr FunctionArrayElement::executeString( return col_res; } +template +ColumnPtr FunctionArrayElement::executeArrayString( + const ColumnsWithTypeAndName & arguments, const PaddedPODArray & indices, ArrayImpl::NullMapBuilder & builder) +{ + const ColumnArray * col_array = checkAndGetColumn(arguments[0].column.get()); + if (!col_array) + return nullptr; + + const ColumnArray * col_nested_array = checkAndGetColumn(&col_array->getData()); + if (!col_nested_array) + return nullptr; + + const ColumnString * col_nested_elem = nullptr; + const auto * col_nullable = checkAndGetColumn(col_nested_array->getDataPtr().get()); + if (!col_nullable) + col_nested_elem = checkAndGetColumn(&col_nested_array->getData()); + else + col_nested_elem = checkAndGetColumn(col_nullable->getNestedColumnPtr().get()); + + if (!col_nested_elem) + return nullptr; + + const auto * string_null_map = col_nullable ? &col_nullable->getNullMapColumn() : nullptr; + auto res_string = ColumnString::create(); + auto res_offsets = ColumnArray::ColumnOffsets::create(); + auto res_string_null_map = col_nullable ? ColumnUInt8::create() : nullptr; + + if (col_nullable) + ArrayElementArrayStringImpl::vector( + col_nested_elem->getChars(), + col_array->getOffsets(), + col_nested_array->getOffsets(), + col_nested_elem->getOffsets(), + &string_null_map->getData(), + indices, + res_string->getChars(), + res_offsets->getData(), + res_string->getOffsets(), + &res_string_null_map->getData(), + builder); + else + ArrayElementArrayStringImpl::vector( + col_nested_elem->getChars(), + col_array->getOffsets(), + col_nested_array->getOffsets(), + col_nested_elem->getOffsets(), + nullptr, + indices, + res_string->getChars(), + res_offsets->getData(), + res_string->getOffsets(), + nullptr, + builder); + + if (col_nullable) + return ColumnArray::create(ColumnNullable::create(std::move(res_string), std::move(res_string_null_map)), std::move(res_offsets)); + else + return ColumnArray::create(std::move(res_string), std::move(res_offsets)); +} + ColumnPtr FunctionArrayElement::executeGenericConst( const ColumnsWithTypeAndName & arguments, const Field & index, ArrayImpl::NullMapBuilder & builder) { @@ -758,34 +1481,119 @@ ColumnPtr FunctionArrayElement::executeArgument( builder.initSink(index_data.size()); ColumnPtr res; - if (!((res = executeNumber(arguments, index_data, builder)) - || (res = executeNumber(arguments, index_data, builder)) - || (res = executeNumber(arguments, index_data, builder)) - || (res = executeNumber(arguments, index_data, builder)) - || (res = executeNumber(arguments, index_data, builder)) - || (res = executeNumber(arguments, index_data, builder)) - || (res = executeNumber(arguments, index_data, builder)) - || (res = executeNumber(arguments, index_data, builder)) - || (res = executeNumber(arguments, index_data, builder)) - || (res = executeNumber(arguments, index_data, builder)) - || (res = executeConst(arguments, result_type, index_data, builder, input_rows_count)) - || (res = executeString(arguments, index_data, builder)) - || (res = executeGeneric(arguments, index_data, builder)))) + if (!((res = executeNumber(arguments, result_type, index_data, builder)) + || (res = executeNumber(arguments, result_type, index_data, builder)) + || (res = executeNumber(arguments, result_type, index_data, builder)) + || (res = executeNumber(arguments, result_type, index_data, builder)) + || (res = executeNumber(arguments, result_type, index_data, builder)) + || (res = executeNumber(arguments, result_type, index_data, builder)) + || (res = executeNumber(arguments, result_type, index_data, builder)) + || (res = executeNumber(arguments, result_type, index_data, builder)) + || (res = executeNumber(arguments, result_type, index_data, builder)) + || (res = executeNumber(arguments, result_type, index_data, builder)) + || (res = executeNumber(arguments, result_type, index_data, builder)) + || (res = executeNumber(arguments, result_type, index_data, builder)) + || (res = executeNumber(arguments, result_type, index_data, builder)) + || (res = executeNumber(arguments, result_type, index_data, builder)) + || (res = executeNumber(arguments, result_type, index_data, builder)) + || (res = executeNumber(arguments, result_type, index_data, builder)) + || (res = executeNumber(arguments, result_type, index_data, builder)) + || (res = executeNumber(arguments, result_type, index_data, builder)) + || (res = executeNumber(arguments, result_type, index_data, builder)) + || (res = executeConst(arguments, result_type, index_data, builder, input_rows_count)) + || (res = executeString(arguments, index_data, builder)) + || (res = executeArrayNumber(arguments, result_type, index_data, builder)) + || (res = executeArrayNumber(arguments, result_type, index_data, builder)) + || (res = executeArrayNumber(arguments, result_type, index_data, builder)) + || (res = executeArrayNumber(arguments, result_type, index_data, builder)) + || (res = executeArrayNumber(arguments, result_type, index_data, builder)) + || (res = executeArrayNumber(arguments, result_type, index_data, builder)) + || (res = executeArrayNumber(arguments, result_type, index_data, builder)) + || (res = executeArrayNumber(arguments, result_type, index_data, builder)) + || (res = executeArrayNumber(arguments, result_type, index_data, builder)) + || (res = executeArrayNumber(arguments, result_type, index_data, builder)) + || (res = executeArrayNumber(arguments, result_type, index_data, builder)) + || (res = executeArrayNumber(arguments, result_type, index_data, builder)) + || (res = executeArrayNumber(arguments, result_type, index_data, builder)) + || (res = executeArrayNumber(arguments, result_type, index_data, builder)) + || (res = executeArrayNumber(arguments, result_type, index_data, builder)) + || (res = executeArrayNumber(arguments, result_type, index_data, builder)) + || (res = executeArrayNumber(arguments, result_type, index_data, builder)) + || (res = executeArrayNumber(arguments, result_type, index_data, builder)) + || (res = executeArrayNumber(arguments, result_type, index_data, builder)) + || (res = executeArrayString(arguments, index_data, builder)) + || (res = executeGeneric(arguments, index_data, builder)))) throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", arguments[0].column->getName(), getName()); return res; } -ColumnPtr FunctionArrayElement::executeTuple(const ColumnsWithTypeAndName & arguments, size_t input_rows_count) const +ColumnPtr FunctionArrayElement::executeMap2(const ColumnsWithTypeAndName & arguments, size_t input_rows_count) const { const ColumnArray * col_array = typeid_cast(arguments[0].column.get()); + if (!col_array) + return nullptr; + + const ColumnMap * col_map = typeid_cast(&col_array->getData()); + if (!col_map) + return nullptr; + const ColumnArray * col_map_nested = &col_map->getNestedColumn(); + const ColumnTuple * col_map_kv = checkAndGetColumn(col_map_nested->getDataPtr().get()); + ColumnPtr col_map_keys = col_map_kv->getColumnPtr(0); + ColumnPtr col_map_values = col_map_kv->getColumnPtr(1); + + const DataTypeMap & map_type + = typeid_cast(*typeid_cast(*arguments[0].type).getNestedType()); + const auto & key_type = map_type.getKeyType(); + const auto & value_type = map_type.getValueType(); + + ColumnsWithTypeAndName temporary_results(2); + temporary_results[1] = arguments[1]; + + ColumnPtr result_key_column; + /// Calculate the function for the keys of the map. + { + ColumnWithTypeAndName array_of_keys; + array_of_keys.column + = ColumnArray::create(ColumnArray::create(col_map_keys, col_map_nested->getOffsetsPtr()), col_array->getOffsetsPtr()); + array_of_keys.type = std::make_shared(std::make_shared(key_type)); + temporary_results[0] = std::move(array_of_keys); + + auto type = getReturnTypeImpl({temporary_results[0].type, temporary_results[1].type}); + auto col = executeImpl(temporary_results, type, input_rows_count); + result_key_column = std::move(col); + } + + /// Calculate the function for the values of the map + ColumnPtr result_value_column; + { + ColumnWithTypeAndName array_of_values; + array_of_values.column + = ColumnArray::create(ColumnArray::create(col_map_values, col_map_nested->getOffsetsPtr()), col_array->getOffsetsPtr()); + array_of_values.type = std::make_shared(std::make_shared(value_type)); + temporary_results[0] = std::move(array_of_values); + + auto type = getReturnTypeImpl({temporary_results[0].type, temporary_results[1].type}); + auto col = executeImpl(temporary_results, type, input_rows_count); + result_value_column = std::move(col); + } + + const auto & data_keys = typeid_cast(*result_key_column).getDataPtr(); + const auto & data_values = typeid_cast(*result_value_column).getDataPtr(); + const auto & offsets = typeid_cast(*result_key_column).getOffsetsPtr(); + auto result_nested_column = ColumnArray::create(ColumnTuple::create(Columns{data_keys, data_values}), offsets); + return ColumnMap::create(std::move(result_nested_column)); +} + +ColumnPtr FunctionArrayElement::executeTuple(const ColumnsWithTypeAndName & arguments, size_t input_rows_count) const +{ + const ColumnArray * col_array = typeid_cast(arguments[0].column.get()); if (!col_array) return nullptr; const ColumnTuple * col_nested = typeid_cast(&col_array->getData()); - if (!col_nested) return nullptr; @@ -1275,6 +2083,8 @@ ColumnPtr FunctionArrayElement::perform(const ColumnsWithTypeAndName & arguments ColumnPtr res; if ((res = executeTuple(arguments, input_rows_count))) return res; + else if ((res = executeMap2(arguments, input_rows_count))) + return res; else if (!isColumnConst(*arguments[1].column)) { if (!((res = executeArgument(arguments, result_type, builder, input_rows_count)) @@ -1300,20 +2110,52 @@ ColumnPtr FunctionArrayElement::perform(const ColumnsWithTypeAndName & arguments if (index == 0u) throw Exception(ErrorCodes::ZERO_ARRAY_OR_TUPLE_INDEX, "Array indices are 1-based"); - if (!((res = executeNumberConst(arguments, index, builder)) - || (res = executeNumberConst(arguments, index, builder)) - || (res = executeNumberConst(arguments, index, builder)) - || (res = executeNumberConst(arguments, index, builder)) - || (res = executeNumberConst(arguments, index, builder)) - || (res = executeNumberConst(arguments, index, builder)) - || (res = executeNumberConst(arguments, index, builder)) - || (res = executeNumberConst(arguments, index, builder)) - || (res = executeNumberConst(arguments, index, builder)) - || (res = executeNumberConst(arguments, index, builder)) - || (res = executeStringConst (arguments, index, builder)) - || (res = executeGenericConst (arguments, index, builder)))) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", - arguments[0].column->getName(), getName()); + if (!((res = executeNumberConst(arguments, result_type, index, builder)) + || (res = executeNumberConst(arguments, result_type, index, builder)) + || (res = executeNumberConst(arguments, result_type, index, builder)) + || (res = executeNumberConst(arguments, result_type, index, builder)) + || (res = executeNumberConst(arguments, result_type, index, builder)) + || (res = executeNumberConst(arguments, result_type, index, builder)) + || (res = executeNumberConst(arguments, result_type, index, builder)) + || (res = executeNumberConst(arguments, result_type, index, builder)) + || (res = executeNumberConst(arguments, result_type, index, builder)) + || (res = executeNumberConst(arguments, result_type, index, builder)) + || (res = executeNumberConst(arguments, result_type, index, builder)) + || (res = executeNumberConst(arguments, result_type, index, builder)) + || (res = executeNumberConst(arguments, result_type, index, builder)) + || (res = executeNumberConst(arguments, result_type, index, builder)) + || (res = executeNumberConst(arguments, result_type, index, builder)) + || (res = executeNumberConst(arguments, result_type, index, builder)) + || (res = executeNumberConst(arguments, result_type, index, builder)) + || (res = executeNumberConst(arguments, result_type, index, builder)) + || (res = executeNumberConst(arguments, result_type, index, builder)) + || (res = executeStringConst(arguments, index, builder)) + || (res = executeArrayNumberConst(arguments, result_type, index, builder)) + || (res = executeArrayNumberConst(arguments, result_type, index, builder)) + || (res = executeArrayNumberConst(arguments, result_type, index, builder)) + || (res = executeArrayNumberConst(arguments, result_type, index, builder)) + || (res = executeArrayNumberConst(arguments, result_type, index, builder)) + || (res = executeArrayNumberConst(arguments, result_type, index, builder)) + || (res = executeArrayNumberConst(arguments, result_type, index, builder)) + || (res = executeArrayNumberConst(arguments, result_type, index, builder)) + || (res = executeArrayNumberConst(arguments, result_type, index, builder)) + || (res = executeArrayNumberConst(arguments, result_type, index, builder)) + || (res = executeArrayNumberConst(arguments, result_type, index, builder)) + || (res = executeArrayNumberConst(arguments, result_type, index, builder)) + || (res = executeArrayNumberConst(arguments, result_type, index, builder)) + || (res = executeArrayNumberConst(arguments, result_type, index, builder)) + || (res = executeArrayNumberConst(arguments, result_type, index, builder)) + || (res = executeArrayNumberConst(arguments, result_type, index, builder)) + || (res = executeArrayNumberConst(arguments, result_type, index, builder)) + || (res = executeArrayNumberConst(arguments, result_type, index, builder)) + || (res = executeArrayNumberConst(arguments, result_type, index, builder)) + || (res = executeArrayStringConst(arguments, index, builder)) + || (res = executeGenericConst(arguments, index, builder)))) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Illegal column {} of first argument of function {}", + arguments[0].column->getName(), + getName()); } return res; diff --git a/tests/performance/array_element.xml b/tests/performance/array_element.xml index 1f82b8333805..c3641f426f3e 100644 --- a/tests/performance/array_element.xml +++ b/tests/performance/array_element.xml @@ -2,4 +2,20 @@ SELECT count() FROM numbers(100000000) WHERE NOT ignore([[1], [2]][number % 2 + 2]) SELECT count() FROM numbers(100000000) WHERE NOT ignore([[], [2]][number % 2 + 2]) SELECT count() FROM numbers(100000000) WHERE NOT ignore([[], []][number % 2 + 2]) + + + select materialize(array(array(1,2,3,4)))[1] from numbers(10000000) format Null + select materialize(array(array(1,2,3,4)))[materialize(1)] from numbers(10000000) format Null + + + select materialize(array(array('hello', 'world')))[1] from numbers(10000000) format Null + select materialize(array(array('hello', 'world')))[materialize(1)] from numbers(10000000) format Null + + + select materialize(array(map('hello', 1, 'world', 2)))[1] from numbers(10000000) format Null + select materialize(array(map('hello', 1, 'world', 2)))[materialize(1)] from numbers(10000000) format Null + + + select materialize(array(1.23::Decimal256(2), 4.56::Decimal256(2)))[1] from numbers(10000000) format Null + select materialize(array(1.23::Decimal256(2), 4.56::Decimal256(2)))[materialize(1)] from numbers(10000000) format Null