Skip to content

Commit

Permalink
Merge pull request #61629 from ClickHouse/cherrypick/24.2/80e195a41ab…
Browse files Browse the repository at this point in the history
…ed9f6a0d88e3d7ea8eebd238ba9ca

Cherry pick #60928 to 24.2: Less copying and a correctness fix for `dotProduct`
  • Loading branch information
robot-ch-test-poll committed Mar 20, 2024
2 parents 60b6946 + 80e195a commit 1aed911
Show file tree
Hide file tree
Showing 6 changed files with 228 additions and 145 deletions.
100 changes: 40 additions & 60 deletions src/Functions/array/arrayDistance.cpp
Expand Up @@ -18,11 +18,11 @@ namespace DB
{
namespace ErrorCodes
{
extern const int ARGUMENT_OUT_OF_BOUND;
extern const int ILLEGAL_COLUMN;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int LOGICAL_ERROR;
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
extern const int ARGUMENT_OUT_OF_BOUND;
}

struct L1Distance
Expand Down Expand Up @@ -357,7 +357,7 @@ class FunctionArrayDistance : public IFunction
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Arguments of function {} has nested type {}. "
"Support: UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64.",
"Supported types: UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64.",
getName(),
common_type->getName());
}
Expand All @@ -379,17 +379,17 @@ class FunctionArrayDistance : public IFunction
}


#define SUPPORTED_TYPES(action) \
action(UInt8) \
action(UInt16) \
action(UInt32) \
action(UInt64) \
action(Int8) \
action(Int16) \
action(Int32) \
action(Int64) \
action(Float32) \
action(Float64)
#define SUPPORTED_TYPES(ACTION) \
ACTION(UInt8) \
ACTION(UInt16) \
ACTION(UInt32) \
ACTION(UInt64) \
ACTION(Int8) \
ACTION(Int16) \
ACTION(Int32) \
ACTION(Int64) \
ACTION(Float32) \
ACTION(Float64)


private:
Expand All @@ -398,12 +398,11 @@ class FunctionArrayDistance : public IFunction
{
DataTypePtr type_x = typeid_cast<const DataTypeArray *>(arguments[0].type.get())->getNestedType();

/// Dynamic disaptch based on the 1st argument type
switch (type_x->getTypeId())
{
#define ON_TYPE(type) \
case TypeIndex::type: \
return executeWithFirstType<ResultType, type>(arguments, input_rows_count); \
return executeWithResultTypeAndLeftType<ResultType, type>(arguments, input_rows_count); \
break;

SUPPORTED_TYPES(ON_TYPE)
Expand All @@ -413,23 +412,22 @@ class FunctionArrayDistance : public IFunction
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Arguments of function {} has nested type {}. "
"Support: UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64.",
"Supported types: UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64.",
getName(),
type_x->getName());
}
}

template <typename ResultType, typename FirstArgType>
ColumnPtr executeWithFirstType(const ColumnsWithTypeAndName & arguments, size_t input_rows_count) const
template <typename ResultType, typename LeftType>
ColumnPtr executeWithResultTypeAndLeftType(const ColumnsWithTypeAndName & arguments, size_t input_rows_count) const
{
DataTypePtr type_y = typeid_cast<const DataTypeArray *>(arguments[1].type.get())->getNestedType();

/// Dynamic disaptch based on the 2nd argument type
switch (type_y->getTypeId())
{
#define ON_TYPE(type) \
case TypeIndex::type: \
return executeWithTypes<ResultType, FirstArgType, type>(arguments[0].column, arguments[1].column, input_rows_count, arguments); \
return executeWithResultTypeAndLeftTypeAndRightType<ResultType, LeftType, type>(arguments[0].column, arguments[1].column, input_rows_count, arguments); \
break;

SUPPORTED_TYPES(ON_TYPE)
Expand All @@ -439,59 +437,43 @@ class FunctionArrayDistance : public IFunction
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Arguments of function {} has nested type {}. "
"Support: UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64.",
"Supported types: UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64.",
getName(),
type_y->getName());
}
}

template <typename ResultType, typename FirstArgType, typename SecondArgType>
ColumnPtr executeWithTypes(ColumnPtr col_x, ColumnPtr col_y, size_t input_rows_count, const ColumnsWithTypeAndName & arguments) const
template <typename ResultType, typename LeftType, typename RightType>
ColumnPtr executeWithResultTypeAndLeftTypeAndRightType(ColumnPtr col_x, ColumnPtr col_y, size_t input_rows_count, const ColumnsWithTypeAndName & arguments) const
{
if (typeid_cast<const ColumnConst *>(col_x.get()))
{
return executeWithTypesFirstArgConst<ResultType, FirstArgType, SecondArgType>(col_x, col_y, input_rows_count, arguments);
return executeWithLeftArgConst<ResultType, LeftType, RightType>(col_x, col_y, input_rows_count, arguments);
}
else if (typeid_cast<const ColumnConst *>(col_y.get()))
{
return executeWithTypesFirstArgConst<ResultType, SecondArgType, FirstArgType>(col_y, col_x, input_rows_count, arguments);
return executeWithLeftArgConst<ResultType, RightType, LeftType>(col_y, col_x, input_rows_count, arguments);
}

col_x = col_x->convertToFullColumnIfConst();
col_y = col_y->convertToFullColumnIfConst();

const auto & array_x = *assert_cast<const ColumnArray *>(col_x.get());
const auto & array_y = *assert_cast<const ColumnArray *>(col_y.get());

const auto & data_x = typeid_cast<const ColumnVector<FirstArgType> &>(array_x.getData()).getData();
const auto & data_y = typeid_cast<const ColumnVector<SecondArgType> &>(array_y.getData()).getData();
const auto & data_x = typeid_cast<const ColumnVector<LeftType> &>(array_x.getData()).getData();
const auto & data_y = typeid_cast<const ColumnVector<RightType> &>(array_y.getData()).getData();

const auto & offsets_x = array_x.getOffsets();
const auto & offsets_y = array_y.getOffsets();

/// Check that arrays in both columns are the sames size
for (size_t row = 0; row < offsets_x.size(); ++row)
{
if (offsets_x[row] != offsets_y[row]) [[unlikely]]
{
ColumnArray::Offset prev_offset = row > 0 ? offsets_x[row] : 0;
throw Exception(
ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH,
"Arguments of function {} have different array sizes: {} and {}",
getName(),
offsets_x[row] - prev_offset,
offsets_y[row] - prev_offset);
}
}
if (!array_x.hasEqualOffsets(array_y))
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Array arguments for function {} must have equal sizes", getName());

const typename Kernel::ConstParams kernel_params = initConstParams(arguments);

auto result = ColumnVector<ResultType>::create(input_rows_count);
auto & result_data = result->getData();
auto col_res = ColumnVector<ResultType>::create(input_rows_count);
auto & result_data = col_res->getData();

/// Do the actual computation
ColumnArray::Offset prev = 0;
size_t row = 0;

for (auto off : offsets_x)
{
/// Process chunks in vectorized manner
Expand All @@ -517,47 +499,45 @@ class FunctionArrayDistance : public IFunction
result_data[row] = Kernel::finalize(state, kernel_params);
row++;
}
return result;
return col_res;
}

/// Special case when the 1st parameter is Const
template <typename ResultType, typename FirstArgType, typename SecondArgType>
ColumnPtr executeWithTypesFirstArgConst(ColumnPtr col_x, ColumnPtr col_y, size_t input_rows_count, const ColumnsWithTypeAndName & arguments) const
template <typename ResultType, typename LeftType, typename RightType>
ColumnPtr executeWithLeftArgConst(ColumnPtr col_x, ColumnPtr col_y, size_t input_rows_count, const ColumnsWithTypeAndName & arguments) const
{
col_x = assert_cast<const ColumnConst *>(col_x.get())->getDataColumnPtr();
col_y = col_y->convertToFullColumnIfConst();

const auto & array_x = *assert_cast<const ColumnArray *>(col_x.get());
const auto & array_y = *assert_cast<const ColumnArray *>(col_y.get());

const auto & data_x = typeid_cast<const ColumnVector<FirstArgType> &>(array_x.getData()).getData();
const auto & data_y = typeid_cast<const ColumnVector<SecondArgType> &>(array_y.getData()).getData();
const auto & data_x = typeid_cast<const ColumnVector<LeftType> &>(array_x.getData()).getData();
const auto & data_y = typeid_cast<const ColumnVector<RightType> &>(array_y.getData()).getData();

const auto & offsets_x = array_x.getOffsets();
const auto & offsets_y = array_y.getOffsets();

/// Check that arrays in both columns are the sames size
ColumnArray::Offset prev_offset = 0;
for (size_t row : collections::range(0, offsets_y.size()))
for (auto offset_y : offsets_y)
{
if (offsets_x[0] != offsets_y[row] - prev_offset) [[unlikely]]
if (offsets_x[0] != offset_y - prev_offset) [[unlikely]]
{
throw Exception(
ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH,
"Arguments of function {} have different array sizes: {} and {}",
getName(),
offsets_x[0],
offsets_y[row] - prev_offset);
offset_y - prev_offset);
}
prev_offset = offsets_y[row];
prev_offset = offset_y;
}

const typename Kernel::ConstParams kernel_params = initConstParams(arguments);

auto result = ColumnVector<ResultType>::create(input_rows_count);
auto & result_data = result->getData();

/// Do the actual computation
size_t prev = 0;
size_t row = 0;

Expand All @@ -574,7 +554,7 @@ class FunctionArrayDistance : public IFunction
/// - the two most common metrics L2 and cosine distance,
/// - the most powerful SIMD instruction set (AVX-512F).
#if USE_MULTITARGET_CODE
if constexpr (std::is_same_v<ResultType, FirstArgType> && std::is_same_v<ResultType, SecondArgType>) /// ResultType is Float32 or Float64
if constexpr (std::is_same_v<ResultType, LeftType> && std::is_same_v<ResultType, RightType>) /// ResultType is Float32 or Float64
{
if constexpr (std::is_same_v<Kernel, L2Distance>
|| std::is_same_v<Kernel, CosineDistance>)
Expand Down

0 comments on commit 1aed911

Please sign in to comment.