Skip to content

Commit

Permalink
Add support for extended precision integers and decimals (#13097)
Browse files Browse the repository at this point in the history
  • Loading branch information
4ertus2 committed Aug 19, 2020
1 parent 3b8ac01 commit becc186
Show file tree
Hide file tree
Showing 116 changed files with 2,770 additions and 614 deletions.
53 changes: 53 additions & 0 deletions base/common/arithmeticOverflow.h
@@ -1,5 +1,7 @@
#pragma once

#include <common/types.h>

namespace common
{
template <typename T>
Expand Down Expand Up @@ -35,6 +37,21 @@ namespace common
return (y > 0 && x > max_int128 - y) || (y < 0 && x < min_int128 - y);
}

template <>
inline bool addOverflow(bInt256 x, bInt256 y, bInt256 & res)
{
res = x + y;
return (y > 0 && x > std::numeric_limits<bInt256>::max() - y) ||
(y < 0 && x < std::numeric_limits<bInt256>::min() - y);
}

template <>
inline bool addOverflow(bUInt256 x, bUInt256 y, bUInt256 & res)
{
res = x + y;
return x > std::numeric_limits<bUInt256>::max() - y;
}

template <typename T>
inline bool subOverflow(T x, T y, T & res)
{
Expand Down Expand Up @@ -68,6 +85,21 @@ namespace common
return (y < 0 && x > max_int128 + y) || (y > 0 && x < min_int128 + y);
}

template <>
inline bool subOverflow(bInt256 x, bInt256 y, bInt256 & res)
{
res = x - y;
return (y < 0 && x > std::numeric_limits<bInt256>::max() + y) ||
(y > 0 && x < std::numeric_limits<bInt256>::min() + y);
}

template <>
inline bool subOverflow(bUInt256 x, bUInt256 y, bUInt256 & res)
{
res = x - y;
return x < y;
}

template <typename T>
inline bool mulOverflow(T x, T y, T & res)
{
Expand Down Expand Up @@ -103,4 +135,25 @@ namespace common
unsigned __int128 b = (y > 0) ? y : -y;
return (a * b) / b != a;
}

template <>
inline bool mulOverflow(bInt256 x, bInt256 y, bInt256 & res)
{
res = x * y;
if (!x || !y)
return false;

bInt256 a = (x > 0) ? x : -x;
bInt256 b = (y > 0) ? y : -y;
return (a * b) / b != a;
}

template <>
inline bool mulOverflow(bUInt256 x, bUInt256 y, bUInt256 & res)
{
res = x * y;
if (!x || !y)
return false;
return (x * y) / y != x;
}
}
65 changes: 63 additions & 2 deletions base/common/types.h
Expand Up @@ -6,6 +6,8 @@
#include <string>
#include <type_traits>

#include <boost/multiprecision/cpp_int.hpp>

using Int8 = int8_t;
using Int16 = int16_t;
using Int32 = int32_t;
Expand All @@ -15,11 +17,21 @@ using Int64 = int64_t;
using char8_t = unsigned char;
#endif

/// This is needed for more strict aliasing. https://godbolt.org/z/xpJBSb https://stackoverflow.com/a/57453713
using UInt8 = char8_t;
using UInt16 = uint16_t;
using UInt32 = uint32_t;
using UInt64 = uint64_t;

using Int128 = __int128;

/// We have to use 127 and 255 bit integers to safe a bit for a sign serialization
//using bInt256 = boost::multiprecision::int256_t;
using bInt256 = boost::multiprecision::number<boost::multiprecision::cpp_int_backend<
255, 255, boost::multiprecision::signed_magnitude, boost::multiprecision::unchecked, void> >;
using bUInt256 = boost::multiprecision::uint256_t;


using String = std::string;

/// The standard library type traits, such as std::is_arithmetic, with one exception
Expand All @@ -31,6 +43,9 @@ struct is_signed
static constexpr bool value = std::is_signed_v<T>;
};

template <> struct is_signed<Int128> { static constexpr bool value = true; };
template <> struct is_signed<bInt256> { static constexpr bool value = true; };

template <typename T>
inline constexpr bool is_signed_v = is_signed<T>::value;

Expand All @@ -40,23 +55,69 @@ struct is_unsigned
static constexpr bool value = std::is_unsigned_v<T>;
};

template <> struct is_unsigned<bUInt256> { static constexpr bool value = true; };

template <typename T>
inline constexpr bool is_unsigned_v = is_unsigned<T>::value;


/// TODO: is_integral includes char, char8_t and wchar_t.
template <typename T>
struct is_integral
struct is_integer
{
static constexpr bool value = std::is_integral_v<T>;
};

template <> struct is_integer<Int128> { static constexpr bool value = true; };
template <> struct is_integer<bInt256> { static constexpr bool value = true; };
template <> struct is_integer<bUInt256> { static constexpr bool value = true; };

template <typename T>
inline constexpr bool is_integral_v = is_integral<T>::value;
inline constexpr bool is_integer_v = is_integer<T>::value;


template <typename T>
struct is_arithmetic
{
static constexpr bool value = std::is_arithmetic_v<T>;
};

template <> struct is_arithmetic<__int128> { static constexpr bool value = true; };

template <typename T>
inline constexpr bool is_arithmetic_v = is_arithmetic<T>::value;

template <typename T>
struct make_unsigned
{
typedef std::make_unsigned_t<T> type;
};

template <> struct make_unsigned<__int128> { using type = unsigned __int128; };
template <> struct make_unsigned<bInt256> { using type = bUInt256; };
template <> struct make_unsigned<bUInt256> { using type = bUInt256; };

template <typename T> using make_unsigned_t = typename make_unsigned<T>::type;

template <typename T>
struct make_signed
{
typedef std::make_signed_t<T> type;
};

template <> struct make_signed<bInt256> { typedef bInt256 type; };
template <> struct make_signed<bUInt256> { typedef bInt256 type; };

template <typename T> using make_signed_t = typename make_signed<T>::type;

template <typename T>
struct is_big_int
{
static constexpr bool value = false;
};

template <> struct is_big_int<bUInt256> { static constexpr bool value = true; };
template <> struct is_big_int<bInt256> { static constexpr bool value = true; };

template <typename T>
inline constexpr bool is_big_int_v = is_big_int<T>::value;
5 changes: 4 additions & 1 deletion src/AggregateFunctions/AggregateFunctionAvg.cpp
Expand Up @@ -17,7 +17,10 @@ namespace
template <typename T>
struct Avg
{
using FieldType = std::conditional_t<IsDecimalNumber<T>, Decimal128, NearestFieldType<T>>;
using FieldType = std::conditional_t<IsDecimalNumber<T>,
std::conditional_t<std::is_same_v<T, Decimal256>, Decimal256, Decimal128>,
NearestFieldType<T>>;
// using FieldType = std::conditional_t<IsDecimalNumber<T>, Decimal128, NearestFieldType<T>>;
using Function = AggregateFunctionAvg<T, AggregateFunctionAvgData<FieldType, UInt64>>;
};

Expand Down
21 changes: 15 additions & 6 deletions src/AggregateFunctions/AggregateFunctionAvg.h
Expand Up @@ -22,19 +22,28 @@ struct AggregateFunctionAvgData
using NumeratorType = T;
using DenominatorType = Denominator;

T numerator = 0;
Denominator denominator = 0;
T numerator{0};
Denominator denominator{0};

template <typename ResultT>
ResultT NO_SANITIZE_UNDEFINED result() const
{
if constexpr (std::is_floating_point_v<ResultT>)
if constexpr (std::numeric_limits<ResultT>::is_iec559)
return static_cast<ResultT>(numerator) / denominator; /// allow division by zero

if (denominator == 0)
{
if constexpr (is_big_int_v<Denominator>)
return static_cast<ResultT>(numerator) / static_cast<ResultT>(denominator);
else
return static_cast<ResultT>(numerator) / denominator; /// allow division by zero
}

if (denominator == static_cast<Denominator>(0))
return static_cast<ResultT>(0);
return static_cast<ResultT>(numerator / denominator);

if constexpr (std::is_same_v<T, Decimal256>)
return static_cast<ResultT>(numerator / static_cast<T>(denominator));
else
return static_cast<ResultT>(numerator / denominator);
}
};

Expand Down
5 changes: 4 additions & 1 deletion src/AggregateFunctions/AggregateFunctionAvgWeighted.cpp
Expand Up @@ -17,7 +17,10 @@ namespace
template <typename T>
struct AvgWeighted
{
using FieldType = std::conditional_t<IsDecimalNumber<T>, Decimal128, NearestFieldType<T>>;
using FieldType = std::conditional_t<IsDecimalNumber<T>,
std::conditional_t<std::is_same_v<T, Decimal256>, Decimal256, Decimal128>,
NearestFieldType<T>>;
// using FieldType = std::conditional_t<IsDecimalNumber<T>, Decimal128, NearestFieldType<T>>;
using Function = AggregateFunctionAvgWeighted<T, AggregateFunctionAvgData<FieldType, FieldType>>;
};

Expand Down
7 changes: 6 additions & 1 deletion src/AggregateFunctions/AggregateFunctionGroupArray.h
Expand Up @@ -295,7 +295,12 @@ class GroupArrayNumericImpl final
if (size)
{
typename ColumnVector<T>::Container & data_to = assert_cast<ColumnVector<T> &>(arr_to.getData()).getData();
data_to.insert(this->data(place).value.begin(), this->data(place).value.end());
if constexpr (is_big_int_v<T>)
// is data_to empty? we should probaly use std::vector::insert then
for (auto it = this->data(place).value.begin(); it != this->data(place).value.end(); it++)
data_to.push_back(*it);
else
data_to.insert(this->data(place).value.begin(), this->data(place).value.end());
}
}

Expand Down
19 changes: 15 additions & 4 deletions src/AggregateFunctions/AggregateFunctionGroupArrayMoving.cpp
Expand Up @@ -37,12 +37,12 @@ struct MovingAvg
template <typename T, typename LimitNumberOfElements> using MovingSumTemplate = typename MovingSum<T, LimitNumberOfElements>::Function;
template <typename T, typename LimitNumberOfElements> using MovingAvgTemplate = typename MovingAvg<T, LimitNumberOfElements>::Function;

template <template <typename, typename> class Function, typename HasLimit, typename ... TArgs>
template <template <typename, typename> class Function, typename HasLimit, typename DecimalArg, typename ... TArgs>
inline AggregateFunctionPtr createAggregateFunctionMovingImpl(const std::string & name, const DataTypePtr & argument_type, TArgs ... args)
{
AggregateFunctionPtr res;

if (isDecimal(argument_type))
if constexpr (DecimalArg::value)
res.reset(createWithDecimalType<Function, HasLimit>(*argument_type, argument_type, std::forward<TArgs>(args)...));
else
res.reset(createWithNumericType<Function, HasLimit>(*argument_type, argument_type, std::forward<TArgs>(args)...));
Expand Down Expand Up @@ -84,10 +84,21 @@ AggregateFunctionPtr createAggregateFunctionMoving(const std::string & name, con
throw Exception("Incorrect number of parameters for aggregate function " + name + ", should be 0 or 1",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);

const DataTypePtr & argument_type = argument_types[0];
if (!limit_size)
return createAggregateFunctionMovingImpl<Function, std::false_type>(name, argument_types[0]);
{
if (isDecimal(argument_type))
return createAggregateFunctionMovingImpl<Function, std::false_type, std::true_type>(name, argument_type);
else
return createAggregateFunctionMovingImpl<Function, std::false_type, std::false_type>(name, argument_type);
}
else
return createAggregateFunctionMovingImpl<Function, std::true_type>(name, argument_types[0], max_elems);
{
if (isDecimal(argument_type))
return createAggregateFunctionMovingImpl<Function, std::true_type, std::true_type>(name, argument_type, max_elems);
else
return createAggregateFunctionMovingImpl<Function, std::true_type, std::false_type>(name, argument_type, max_elems);
}
}

}
Expand Down
2 changes: 1 addition & 1 deletion src/AggregateFunctions/AggregateFunctionGroupArrayMoving.h
Expand Up @@ -117,7 +117,7 @@ class MovingImpl final
void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override
{
auto value = static_cast<const ColumnSource &>(*columns[0]).getData()[row_num];
this->data(place).add(value, arena);
this->data(place).add(static_cast<ResultT>(value), arena);
}

void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override
Expand Down
20 changes: 19 additions & 1 deletion src/AggregateFunctions/AggregateFunctionQuantile.cpp
Expand Up @@ -59,6 +59,16 @@ static constexpr bool supportDecimal()
std::is_same_v<Function<Float32, false>, FuncQuantilesExactWeighted<Float32, false>>;
}

template <template <typename, bool> class Function>
static constexpr bool supportBigInt()
{
return std::is_same_v<Function<Float32, false>, FuncQuantile<Float32, false>> ||
std::is_same_v<Function<Float32, false>, FuncQuantiles<Float32, false>> ||
std::is_same_v<Function<Float32, false>, FuncQuantileExact<Float32, false>> ||
std::is_same_v<Function<Float32, false>, FuncQuantilesExact<Float32, false>> ||
std::is_same_v<Function<Float32, false>, FuncQuantileExactWeighted<Float32, false>> ||
std::is_same_v<Function<Float32, false>, FuncQuantilesExactWeighted<Float32, false>>;
}

template <template <typename, bool> class Function>
AggregateFunctionPtr createAggregateFunctionQuantile(const std::string & name, const DataTypes & argument_types, const Array & params)
Expand All @@ -71,7 +81,7 @@ AggregateFunctionPtr createAggregateFunctionQuantile(const std::string & name, c

#define DISPATCH(TYPE) \
if (which.idx == TypeIndex::TYPE) return std::make_shared<Function<TYPE, true>>(argument_types, params);
FOR_NUMERIC_TYPES(DISPATCH)
FOR_BASIC_NUMERIC_TYPES(DISPATCH)
#undef DISPATCH
if (which.idx == TypeIndex::Date) return std::make_shared<Function<DataTypeDate::FieldType, false>>(argument_types, params);
if (which.idx == TypeIndex::DateTime) return std::make_shared<Function<DataTypeDateTime::FieldType, false>>(argument_types, params);
Expand All @@ -81,6 +91,14 @@ AggregateFunctionPtr createAggregateFunctionQuantile(const std::string & name, c
if (which.idx == TypeIndex::Decimal32) return std::make_shared<Function<Decimal32, false>>(argument_types, params);
if (which.idx == TypeIndex::Decimal64) return std::make_shared<Function<Decimal64, false>>(argument_types, params);
if (which.idx == TypeIndex::Decimal128) return std::make_shared<Function<Decimal128, false>>(argument_types, params);
//if (which.idx == TypeIndex::Decimal256) return std::make_shared<Function<Decimal256, false>>(argument_types, params);
}

if constexpr (supportBigInt<Function>())
{
if (which.idx == TypeIndex::Int128) return std::make_shared<Function<Int128, true>>(argument_types, params);
if (which.idx == TypeIndex::bInt256) return std::make_shared<Function<bInt256, true>>(argument_types, params);
if (which.idx == TypeIndex::bUInt256) return std::make_shared<Function<bUInt256, true>>(argument_types, params);
}

throw Exception("Illegal type " + argument_type->getName() + " of argument for aggregate function " + name,
Expand Down
17 changes: 13 additions & 4 deletions src/AggregateFunctions/AggregateFunctionStatisticsSimple.h
Expand Up @@ -433,11 +433,20 @@ class AggregateFunctionVarianceSimple final
{
if constexpr (StatFunc::num_args == 2)
this->data(place).add(
static_cast<const ColVecT1 &>(*columns[0]).getData()[row_num],
static_cast<const ColVecT2 &>(*columns[1]).getData()[row_num]);
static_cast<ResultType>(static_cast<const ColVecT1 &>(*columns[0]).getData()[row_num]),
static_cast<ResultType>(static_cast<const ColVecT2 &>(*columns[1]).getData()[row_num]));
else
this->data(place).add(
static_cast<const ColVecT1 &>(*columns[0]).getData()[row_num]);
{
if constexpr (std::is_same_v<T1, Decimal256>)
{
this->data(place).add(static_cast<ResultType>(
static_cast<const ColVecT1 &>(*columns[0]).getData()[row_num].value
));
}
else
this->data(place).add(
static_cast<ResultType>(static_cast<const ColVecT1 &>(*columns[0]).getData()[row_num]));
}
}

void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
Expand Down
5 changes: 4 additions & 1 deletion src/AggregateFunctions/AggregateFunctionSum.cpp
Expand Up @@ -19,7 +19,10 @@ template <typename T>
struct SumSimple
{
/// @note It uses slow Decimal128 (cause we need such a variant). sumWithOverflow is faster for Decimal32/64
using ResultType = std::conditional_t<IsDecimalNumber<T>, Decimal128, NearestFieldType<T>>;
using ResultType = std::conditional_t<IsDecimalNumber<T>,
std::conditional_t<std::is_same_v<T, Decimal256>, Decimal256, Decimal128>,
NearestFieldType<T>>;
// using ResultType = std::conditional_t<IsDecimalNumber<T>, Decimal128, NearestFieldType<T>>;
using AggregateDataType = AggregateFunctionSumData<ResultType>;
using Function = AggregateFunctionSum<T, ResultType, AggregateDataType, AggregateFunctionTypeSum>;
};
Expand Down

0 comments on commit becc186

Please sign in to comment.