Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Speedup MIN and MAX for native types #58231

Merged
merged 2 commits into from Dec 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
118 changes: 115 additions & 3 deletions src/AggregateFunctions/AggregateFunctionMax.cpp
@@ -1,7 +1,7 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/HelpersMinMaxAny.h>
#include <AggregateFunctions/FactoryHelpers.h>

#include <AggregateFunctions/HelpersMinMaxAny.h>
#include <AggregateFunctions/findNumeric.h>

namespace DB
{
Expand All @@ -10,10 +10,122 @@ struct Settings;
namespace
{

template <typename Data>
class AggregateFunctionsSingleValueMax final : public AggregateFunctionsSingleValue<Data>
{
using Parent = AggregateFunctionsSingleValue<Data>;

public:
explicit AggregateFunctionsSingleValueMax(const DataTypePtr & type) : Parent(type) { }

/// Specializations for native numeric types
ALWAYS_INLINE inline void addBatchSinglePlace(
size_t row_begin,
size_t row_end,
AggregateDataPtr __restrict place,
const IColumn ** __restrict columns,
Arena * arena,
ssize_t if_argument_pos) const override;

ALWAYS_INLINE inline void addBatchSinglePlaceNotNull(
size_t row_begin,
size_t row_end,
AggregateDataPtr __restrict place,
const IColumn ** __restrict columns,
const UInt8 * __restrict null_map,
Arena * arena,
ssize_t if_argument_pos) const override;
};

// NOLINTBEGIN(bugprone-macro-parentheses)
#define SPECIALIZE(TYPE) \
template <> \
void AggregateFunctionsSingleValueMax<typename DB::AggregateFunctionMaxData<SingleValueDataFixed<TYPE>>>::addBatchSinglePlace( \
size_t row_begin, \
size_t row_end, \
AggregateDataPtr __restrict place, \
const IColumn ** __restrict columns, \
Arena *, \
ssize_t if_argument_pos) const \
{ \
const auto & column = assert_cast<const DB::AggregateFunctionMaxData<SingleValueDataFixed<TYPE>>::ColVecType &>(*columns[0]); \
std::optional<TYPE> opt; \
if (if_argument_pos >= 0) \
{ \
const auto & flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData(); \
opt = findNumericMaxIf(column.getData().data(), flags.data(), row_begin, row_end); \
} \
else \
opt = findNumericMax(column.getData().data(), row_begin, row_end); \
if (opt.has_value()) \
this->data(place).changeIfGreater(opt.value()); \
}
// NOLINTEND(bugprone-macro-parentheses)

FOR_BASIC_NUMERIC_TYPES(SPECIALIZE)
#undef SPECIALIZE

template <typename Data>
void AggregateFunctionsSingleValueMax<Data>::addBatchSinglePlace(
size_t row_begin,
size_t row_end,
AggregateDataPtr __restrict place,
const IColumn ** __restrict columns,
Arena * arena,
ssize_t if_argument_pos) const
{
return Parent::addBatchSinglePlace(row_begin, row_end, place, columns, arena, if_argument_pos);
}

// NOLINTBEGIN(bugprone-macro-parentheses)
#define SPECIALIZE(TYPE) \
template <> \
void AggregateFunctionsSingleValueMax<typename DB::AggregateFunctionMaxData<SingleValueDataFixed<TYPE>>>::addBatchSinglePlaceNotNull( \
size_t row_begin, \
size_t row_end, \
AggregateDataPtr __restrict place, \
const IColumn ** __restrict columns, \
const UInt8 * __restrict null_map, \
Arena *, \
ssize_t if_argument_pos) const \
{ \
const auto & column = assert_cast<const DB::AggregateFunctionMaxData<SingleValueDataFixed<TYPE>>::ColVecType &>(*columns[0]); \
std::optional<TYPE> opt; \
if (if_argument_pos >= 0) \
{ \
const auto * if_flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData().data(); \
auto final_flags = std::make_unique<UInt8[]>(row_end); \
for (size_t i = row_begin; i < row_end; ++i) \
final_flags[i] = (!null_map[i]) & !!if_flags[i]; \
opt = findNumericMaxIf(column.getData().data(), final_flags.get(), row_begin, row_end); \
} \
else \
opt = findNumericMaxNotNull(column.getData().data(), null_map, row_begin, row_end); \
if (opt.has_value()) \
this->data(place).changeIfGreater(opt.value()); \
}
// NOLINTEND(bugprone-macro-parentheses)

FOR_BASIC_NUMERIC_TYPES(SPECIALIZE)
#undef SPECIALIZE

template <typename Data>
void AggregateFunctionsSingleValueMax<Data>::addBatchSinglePlaceNotNull(
size_t row_begin,
size_t row_end,
AggregateDataPtr __restrict place,
const IColumn ** __restrict columns,
const UInt8 * __restrict null_map,
Arena * arena,
ssize_t if_argument_pos) const
{
return Parent::addBatchSinglePlaceNotNull(row_begin, row_end, place, columns, null_map, arena, if_argument_pos);
}

AggregateFunctionPtr createAggregateFunctionMax(
const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
{
return AggregateFunctionPtr(createAggregateFunctionSingleValue<AggregateFunctionsSingleValue, AggregateFunctionMaxData>(name, argument_types, parameters, settings));
return AggregateFunctionPtr(createAggregateFunctionSingleValue<AggregateFunctionsSingleValueMax, AggregateFunctionMaxData>(name, argument_types, parameters, settings));
}

AggregateFunctionPtr createAggregateFunctionArgMax(
Expand Down
118 changes: 116 additions & 2 deletions src/AggregateFunctions/AggregateFunctionMin.cpp
@@ -1,6 +1,7 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/HelpersMinMaxAny.h>
#include <AggregateFunctions/FactoryHelpers.h>
#include <AggregateFunctions/HelpersMinMaxAny.h>
#include <AggregateFunctions/findNumeric.h>


namespace DB
Expand All @@ -10,10 +11,123 @@ struct Settings;
namespace
{

template <typename Data>
class AggregateFunctionsSingleValueMin final : public AggregateFunctionsSingleValue<Data>
{
using Parent = AggregateFunctionsSingleValue<Data>;

public:
explicit AggregateFunctionsSingleValueMin(const DataTypePtr & type) : Parent(type) { }

/// Specializations for native numeric types
ALWAYS_INLINE inline void addBatchSinglePlace(
size_t row_begin,
size_t row_end,
AggregateDataPtr __restrict place,
const IColumn ** __restrict columns,
Arena * arena,
ssize_t if_argument_pos) const override;

ALWAYS_INLINE inline void addBatchSinglePlaceNotNull(
size_t row_begin,
size_t row_end,
AggregateDataPtr __restrict place,
const IColumn ** __restrict columns,
const UInt8 * __restrict null_map,
Arena * arena,
ssize_t if_argument_pos) const override;
};

// NOLINTBEGIN(bugprone-macro-parentheses)
#define SPECIALIZE(TYPE) \
template <> \
void AggregateFunctionsSingleValueMin<typename DB::AggregateFunctionMinData<SingleValueDataFixed<TYPE>>>::addBatchSinglePlace( \
size_t row_begin, \
size_t row_end, \
AggregateDataPtr __restrict place, \
const IColumn ** __restrict columns, \
Arena *, \
ssize_t if_argument_pos) const \
{ \
const auto & column = assert_cast<const DB::AggregateFunctionMinData<SingleValueDataFixed<TYPE>>::ColVecType &>(*columns[0]); \
std::optional<TYPE> opt; \
if (if_argument_pos >= 0) \
{ \
const auto & flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData(); \
opt = findNumericMinIf(column.getData().data(), flags.data(), row_begin, row_end); \
} \
else \
opt = findNumericMin(column.getData().data(), row_begin, row_end); \
if (opt.has_value()) \
this->data(place).changeIfLess(opt.value()); \
}
// NOLINTEND(bugprone-macro-parentheses)

FOR_BASIC_NUMERIC_TYPES(SPECIALIZE)
#undef SPECIALIZE

template <typename Data>
void AggregateFunctionsSingleValueMin<Data>::addBatchSinglePlace(
size_t row_begin,
size_t row_end,
AggregateDataPtr __restrict place,
const IColumn ** __restrict columns,
Arena * arena,
ssize_t if_argument_pos) const
{
return Parent::addBatchSinglePlace(row_begin, row_end, place, columns, arena, if_argument_pos);
}

// NOLINTBEGIN(bugprone-macro-parentheses)
#define SPECIALIZE(TYPE) \
template <> \
void AggregateFunctionsSingleValueMin<typename DB::AggregateFunctionMinData<SingleValueDataFixed<TYPE>>>::addBatchSinglePlaceNotNull( \
size_t row_begin, \
size_t row_end, \
AggregateDataPtr __restrict place, \
const IColumn ** __restrict columns, \
const UInt8 * __restrict null_map, \
Arena *, \
ssize_t if_argument_pos) const \
{ \
const auto & column = assert_cast<const DB::AggregateFunctionMinData<SingleValueDataFixed<TYPE>>::ColVecType &>(*columns[0]); \
std::optional<TYPE> opt; \
if (if_argument_pos >= 0) \
{ \
const auto * if_flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData().data(); \
auto final_flags = std::make_unique<UInt8[]>(row_end); \
for (size_t i = row_begin; i < row_end; ++i) \
final_flags[i] = (!null_map[i]) & !!if_flags[i]; \
opt = findNumericMinIf(column.getData().data(), final_flags.get(), row_begin, row_end); \
} \
else \
opt = findNumericMinNotNull(column.getData().data(), null_map, row_begin, row_end); \
if (opt.has_value()) \
this->data(place).changeIfLess(opt.value()); \
}
// NOLINTEND(bugprone-macro-parentheses)

FOR_BASIC_NUMERIC_TYPES(SPECIALIZE)
#undef SPECIALIZE

template <typename Data>
void AggregateFunctionsSingleValueMin<Data>::addBatchSinglePlaceNotNull(
size_t row_begin,
size_t row_end,
AggregateDataPtr __restrict place,
const IColumn ** __restrict columns,
const UInt8 * __restrict null_map,
Arena * arena,
ssize_t if_argument_pos) const
{
return Parent::addBatchSinglePlaceNotNull(row_begin, row_end, place, columns, null_map, arena, if_argument_pos);
}

AggregateFunctionPtr createAggregateFunctionMin(
const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
{
return AggregateFunctionPtr(createAggregateFunctionSingleValue<AggregateFunctionsSingleValue, AggregateFunctionMinData>(name, argument_types, parameters, settings));
return AggregateFunctionPtr(createAggregateFunctionSingleValue<AggregateFunctionsSingleValueMin, AggregateFunctionMinData>(
name, argument_types, parameters, settings));
}

AggregateFunctionPtr createAggregateFunctionArgMin(
Expand Down
30 changes: 24 additions & 6 deletions src/AggregateFunctions/AggregateFunctionMinMaxAny.h
Expand Up @@ -43,14 +43,12 @@ namespace ErrorCodes
template <typename T>
struct SingleValueDataFixed
{
private:
using Self = SingleValueDataFixed;
using ColVecType = ColumnVectorOrDecimal<T>;

bool has_value = false; /// We need to remember if at least one value has been passed. This is necessary for AggregateFunctionIf.
T value = T{};

public:
static constexpr bool result_is_nullable = false;
static constexpr bool should_skip_null_arguments = true;
static constexpr bool is_any = false;
Expand Down Expand Up @@ -157,6 +155,15 @@ struct SingleValueDataFixed
return false;
}

void changeIfLess(T from)
{
if (!has() || from < value)
{
has_value = true;
value = from;
}
}

bool changeIfGreater(const IColumn & column, size_t row_num, Arena * arena)
{
if (!has() || assert_cast<const ColVecType &>(column).getData()[row_num] > value)
Expand All @@ -179,6 +186,15 @@ struct SingleValueDataFixed
return false;
}

void changeIfGreater(T & from)
{
if (!has() || from > value)
{
has_value = true;
value = from;
}
}

bool isEqualTo(const Self & to) const
{
return has() && to.value == value;
Expand Down Expand Up @@ -448,7 +464,6 @@ struct SingleValueDataFixed
}

#endif

};

struct Compatibility
Expand Down Expand Up @@ -1214,7 +1229,7 @@ struct AggregateFunctionAnyHeavyData : Data


template <typename Data>
class AggregateFunctionsSingleValue final : public IAggregateFunctionDataHelper<Data, AggregateFunctionsSingleValue<Data>>
class AggregateFunctionsSingleValue : public IAggregateFunctionDataHelper<Data, AggregateFunctionsSingleValue<Data>>
{
static constexpr bool is_any = Data::is_any;

Expand All @@ -1230,8 +1245,11 @@ class AggregateFunctionsSingleValue final : public IAggregateFunctionDataHelper<
|| StringRef(Data::name()) == StringRef("max"))
{
if (!type->isComparable())
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of aggregate function {} "
"because the values of that data type are not comparable", type->getName(), getName());
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type {} of argument of aggregate function {} because the values of that data type are not comparable",
type->getName(),
Data::name());
}
}

Expand Down
2 changes: 1 addition & 1 deletion src/AggregateFunctions/AggregateFunctionSum.h
Expand Up @@ -504,7 +504,7 @@ class AggregateFunctionSum final : public IAggregateFunctionDataHelper<Data, Agg
const auto * if_flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData().data();
auto final_flags = std::make_unique<UInt8[]>(row_end);
for (size_t i = row_begin; i < row_end; ++i)
final_flags[i] = (!null_map[i]) & if_flags[i];
final_flags[i] = (!null_map[i]) & !!if_flags[i];

this->data(place).addManyConditional(column.getData().data(), final_flags.get(), row_begin, row_end);
}
Expand Down
15 changes: 15 additions & 0 deletions src/AggregateFunctions/findNumeric.cpp
@@ -0,0 +1,15 @@
#include <AggregateFunctions/findNumeric.h>

namespace DB
{
#define INSTANTIATION(T) \
template std::optional<T> findNumericMin(const T * __restrict ptr, size_t start, size_t end); \
template std::optional<T> findNumericMinNotNull(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end); \
template std::optional<T> findNumericMinIf(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end); \
template std::optional<T> findNumericMax(const T * __restrict ptr, size_t start, size_t end); \
template std::optional<T> findNumericMaxNotNull(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end); \
template std::optional<T> findNumericMaxIf(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end);

FOR_BASIC_NUMERIC_TYPES(INSTANTIATION)
#undef INSTANTIATION
}