Add support for extended precision integers and decimals (#13097)

ClickHouse · Aug 19, 2020 · becc186 · becc186
1 parent 3b8ac01
commit becc186
Show file tree

Hide file tree

Showing 116 changed files with 2,770 additions and 614 deletions.
diff --git a/base/common/arithmeticOverflow.h b/base/common/arithmeticOverflow.h
@@ -1,5 +1,7 @@
 #pragma once
 
+#include <common/types.h>
+
 namespace common
 {
     template <typename T>
@@ -35,6 +37,21 @@ namespace common
         return (y > 0 && x > max_int128 - y) || (y < 0 && x < min_int128 - y);
     }
 
+    template <>
+    inline bool addOverflow(bInt256 x, bInt256 y, bInt256 & res)
+    {
+        res = x + y;
+        return (y > 0 && x > std::numeric_limits<bInt256>::max() - y) ||
+            (y < 0 && x < std::numeric_limits<bInt256>::min() - y);
+    }
+
+    template <>
+    inline bool addOverflow(bUInt256 x, bUInt256 y, bUInt256 & res)
+    {
+        res = x + y;
+        return x > std::numeric_limits<bUInt256>::max() - y;
+    }
+
     template <typename T>
     inline bool subOverflow(T x, T y, T & res)
     {
@@ -68,6 +85,21 @@ namespace common
         return (y < 0 && x > max_int128 + y) || (y > 0 && x < min_int128 + y);
     }
 
+    template <>
+    inline bool subOverflow(bInt256 x, bInt256 y, bInt256 & res)
+    {
+        res = x - y;
+        return (y < 0 && x > std::numeric_limits<bInt256>::max() + y) ||
+            (y > 0 && x < std::numeric_limits<bInt256>::min() + y);
+    }
+
+    template <>
+    inline bool subOverflow(bUInt256 x, bUInt256 y, bUInt256 & res)
+    {
+        res = x - y;
+        return x < y;
+    }
+
     template <typename T>
     inline bool mulOverflow(T x, T y, T & res)
     {
@@ -103,4 +135,25 @@ namespace common
         unsigned __int128 b = (y > 0) ? y : -y;
         return (a * b) / b != a;
     }
+
+    template <>
+    inline bool mulOverflow(bInt256 x, bInt256 y, bInt256 & res)
+    {
+        res = x * y;
+        if (!x || !y)
+            return false;
+
+        bInt256 a = (x > 0) ? x : -x;
+        bInt256 b = (y > 0) ? y : -y;
+        return (a * b) / b != a;
+    }
+
+    template <>
+    inline bool mulOverflow(bUInt256 x, bUInt256 y, bUInt256 & res)
+    {
+        res = x * y;
+        if (!x || !y)
+            return false;
+        return (x * y) / y != x;
+    }
 }
diff --git a/base/common/types.h b/base/common/types.h
@@ -6,6 +6,8 @@
 #include <string>
 #include <type_traits>
 
+#include <boost/multiprecision/cpp_int.hpp>
+
 using Int8 = int8_t;
 using Int16 = int16_t;
 using Int32 = int32_t;
@@ -15,11 +17,21 @@ using Int64 = int64_t;
 using char8_t = unsigned char;
 #endif
 
+/// This is needed for more strict aliasing. https://godbolt.org/z/xpJBSb https://stackoverflow.com/a/57453713
 using UInt8 = char8_t;
 using UInt16 = uint16_t;
 using UInt32 = uint32_t;
 using UInt64 = uint64_t;
 
+using Int128 = __int128;
+
+/// We have to use 127 and 255 bit integers to safe a bit for a sign serialization
+//using bInt256 = boost::multiprecision::int256_t;
+using bInt256 = boost::multiprecision::number<boost::multiprecision::cpp_int_backend<
+    255, 255, boost::multiprecision::signed_magnitude, boost::multiprecision::unchecked, void> >;
+using bUInt256 = boost::multiprecision::uint256_t;
+
+
 using String = std::string;
 
 /// The standard library type traits, such as std::is_arithmetic, with one exception
@@ -31,6 +43,9 @@ struct is_signed
     static constexpr bool value = std::is_signed_v<T>;
 };
 
+template <> struct is_signed<Int128> { static constexpr bool value = true; };
+template <> struct is_signed<bInt256> { static constexpr bool value = true; };
+
 template <typename T>
 inline constexpr bool is_signed_v = is_signed<T>::value;
 
@@ -40,23 +55,69 @@ struct is_unsigned
     static constexpr bool value = std::is_unsigned_v<T>;
 };
 
+template <> struct is_unsigned<bUInt256> { static constexpr bool value = true; };
+
 template <typename T>
 inline constexpr bool is_unsigned_v = is_unsigned<T>::value;
 
+
+/// TODO: is_integral includes char, char8_t and wchar_t.
 template <typename T>
-struct is_integral
+struct is_integer
 {
     static constexpr bool value = std::is_integral_v<T>;
 };
 
+template <> struct is_integer<Int128> { static constexpr bool value = true; };
+template <> struct is_integer<bInt256> { static constexpr bool value = true; };
+template <> struct is_integer<bUInt256> { static constexpr bool value = true; };
+
 template <typename T>
-inline constexpr bool is_integral_v = is_integral<T>::value;
+inline constexpr bool is_integer_v = is_integer<T>::value;
+
 
 template <typename T>
 struct is_arithmetic
 {
     static constexpr bool value = std::is_arithmetic_v<T>;
 };
 
+template <> struct is_arithmetic<__int128> { static constexpr bool value = true; };
+
 template <typename T>
 inline constexpr bool is_arithmetic_v = is_arithmetic<T>::value;
+
+template <typename T>
+struct make_unsigned
+{
+    typedef std::make_unsigned_t<T> type;
+};
+
+template <> struct make_unsigned<__int128> { using type = unsigned __int128; };
+template <> struct make_unsigned<bInt256>  { using type = bUInt256; };
+template <> struct make_unsigned<bUInt256> { using type = bUInt256; };
+
+template <typename T> using make_unsigned_t = typename make_unsigned<T>::type;
+
+template <typename T>
+struct make_signed
+{
+    typedef std::make_signed_t<T> type;
+};
+
+template <> struct make_signed<bInt256>  { typedef bInt256 type; };
+template <> struct make_signed<bUInt256> { typedef bInt256 type; };
+
+template <typename T> using make_signed_t = typename make_signed<T>::type;
+
+template <typename T>
+struct is_big_int
+{
+    static constexpr bool value = false;
+};
+
+template <> struct is_big_int<bUInt256> { static constexpr bool value = true; };
+template <> struct is_big_int<bInt256> { static constexpr bool value = true; };
+
+template <typename T>
+inline constexpr bool is_big_int_v = is_big_int<T>::value;
diff --git a/src/AggregateFunctions/AggregateFunctionAvg.cpp b/src/AggregateFunctions/AggregateFunctionAvg.cpp
@@ -17,7 +17,10 @@ namespace
 template <typename T>
 struct Avg
 {
-    using FieldType = std::conditional_t<IsDecimalNumber<T>, Decimal128, NearestFieldType<T>>;
+    using FieldType = std::conditional_t<IsDecimalNumber<T>,
+                                        std::conditional_t<std::is_same_v<T, Decimal256>, Decimal256, Decimal128>,
+                                        NearestFieldType<T>>;
+    // using FieldType = std::conditional_t<IsDecimalNumber<T>, Decimal128, NearestFieldType<T>>;
     using Function = AggregateFunctionAvg<T, AggregateFunctionAvgData<FieldType, UInt64>>;
 };
 

diff --git a/src/AggregateFunctions/AggregateFunctionAvg.h b/src/AggregateFunctions/AggregateFunctionAvg.h
@@ -22,19 +22,28 @@ struct AggregateFunctionAvgData
     using NumeratorType = T;
     using DenominatorType = Denominator;
 
-    T numerator = 0;
-    Denominator denominator = 0;
+    T numerator{0};
+    Denominator denominator{0};
 
     template <typename ResultT>
     ResultT NO_SANITIZE_UNDEFINED result() const
     {
         if constexpr (std::is_floating_point_v<ResultT>)
             if constexpr (std::numeric_limits<ResultT>::is_iec559)
-                return static_cast<ResultT>(numerator) / denominator; /// allow division by zero
-
-        if (denominator == 0)
+            {
+                if constexpr (is_big_int_v<Denominator>)
+                    return static_cast<ResultT>(numerator) / static_cast<ResultT>(denominator);
+                else
+                    return static_cast<ResultT>(numerator) / denominator; /// allow division by zero
+            }
+
+        if (denominator == static_cast<Denominator>(0))
             return static_cast<ResultT>(0);
-        return static_cast<ResultT>(numerator / denominator);
+
+        if constexpr (std::is_same_v<T, Decimal256>)
+            return static_cast<ResultT>(numerator / static_cast<T>(denominator));
+        else
+            return static_cast<ResultT>(numerator / denominator);
     }
 };
 

diff --git a/src/AggregateFunctions/AggregateFunctionAvgWeighted.cpp b/src/AggregateFunctions/AggregateFunctionAvgWeighted.cpp
@@ -17,7 +17,10 @@ namespace
 template <typename T>
 struct AvgWeighted
 {
-    using FieldType = std::conditional_t<IsDecimalNumber<T>, Decimal128, NearestFieldType<T>>;
+    using FieldType = std::conditional_t<IsDecimalNumber<T>,
+                                         std::conditional_t<std::is_same_v<T, Decimal256>, Decimal256, Decimal128>,
+                                         NearestFieldType<T>>;
+    // using FieldType = std::conditional_t<IsDecimalNumber<T>, Decimal128, NearestFieldType<T>>;
     using Function = AggregateFunctionAvgWeighted<T, AggregateFunctionAvgData<FieldType, FieldType>>;
 };
 

diff --git a/src/AggregateFunctions/AggregateFunctionGroupArray.h b/src/AggregateFunctions/AggregateFunctionGroupArray.h
@@ -295,7 +295,12 @@ class GroupArrayNumericImpl final
         if (size)
         {
             typename ColumnVector<T>::Container & data_to = assert_cast<ColumnVector<T> &>(arr_to.getData()).getData();
-            data_to.insert(this->data(place).value.begin(), this->data(place).value.end());
+            if constexpr (is_big_int_v<T>)
+                // is data_to empty? we should probaly use std::vector::insert then
+                for (auto it = this->data(place).value.begin(); it != this->data(place).value.end(); it++)
+                    data_to.push_back(*it);
+            else
+                data_to.insert(this->data(place).value.begin(), this->data(place).value.end());
         }
     }
 

diff --git a/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.cpp b/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.cpp
@@ -37,12 +37,12 @@ struct MovingAvg
 template <typename T, typename LimitNumberOfElements> using MovingSumTemplate = typename MovingSum<T, LimitNumberOfElements>::Function;
 template <typename T, typename LimitNumberOfElements> using MovingAvgTemplate = typename MovingAvg<T, LimitNumberOfElements>::Function;
 
-template <template <typename, typename> class Function, typename HasLimit, typename ... TArgs>
+template <template <typename, typename> class Function, typename HasLimit, typename DecimalArg, typename ... TArgs>
 inline AggregateFunctionPtr createAggregateFunctionMovingImpl(const std::string & name, const DataTypePtr & argument_type, TArgs ... args)
 {
     AggregateFunctionPtr res;
 
-    if (isDecimal(argument_type))
+    if constexpr (DecimalArg::value)
         res.reset(createWithDecimalType<Function, HasLimit>(*argument_type, argument_type, std::forward<TArgs>(args)...));
     else
         res.reset(createWithNumericType<Function, HasLimit>(*argument_type, argument_type, std::forward<TArgs>(args)...));
@@ -84,10 +84,21 @@ AggregateFunctionPtr createAggregateFunctionMoving(const std::string & name, con
         throw Exception("Incorrect number of parameters for aggregate function " + name + ", should be 0 or 1",
             ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
 
+    const DataTypePtr & argument_type = argument_types[0];
     if (!limit_size)
-        return createAggregateFunctionMovingImpl<Function, std::false_type>(name, argument_types[0]);
+    {
+        if (isDecimal(argument_type))
+            return createAggregateFunctionMovingImpl<Function, std::false_type, std::true_type>(name, argument_type);
+        else
+            return createAggregateFunctionMovingImpl<Function, std::false_type, std::false_type>(name, argument_type);
+    }
     else
-        return createAggregateFunctionMovingImpl<Function, std::true_type>(name, argument_types[0], max_elems);
+    {
+        if (isDecimal(argument_type))
+            return createAggregateFunctionMovingImpl<Function, std::true_type, std::true_type>(name, argument_type, max_elems);
+        else
+            return createAggregateFunctionMovingImpl<Function, std::true_type, std::false_type>(name, argument_type, max_elems);
+    }
 }
 
 }

diff --git a/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.h b/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.h
@@ -117,7 +117,7 @@ class MovingImpl final
     void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override
     {
         auto value = static_cast<const ColumnSource &>(*columns[0]).getData()[row_num];
-        this->data(place).add(value, arena);
+        this->data(place).add(static_cast<ResultT>(value), arena);
     }
 
     void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override

diff --git a/src/AggregateFunctions/AggregateFunctionQuantile.cpp b/src/AggregateFunctions/AggregateFunctionQuantile.cpp
@@ -59,6 +59,16 @@ static constexpr bool supportDecimal()
         std::is_same_v<Function<Float32, false>, FuncQuantilesExactWeighted<Float32, false>>;
 }
 
+template <template <typename, bool> class Function>
+static constexpr bool supportBigInt()
+{
+    return std::is_same_v<Function<Float32, false>, FuncQuantile<Float32, false>> ||
+        std::is_same_v<Function<Float32, false>, FuncQuantiles<Float32, false>> ||
+        std::is_same_v<Function<Float32, false>, FuncQuantileExact<Float32, false>> ||
+        std::is_same_v<Function<Float32, false>, FuncQuantilesExact<Float32, false>> ||
+        std::is_same_v<Function<Float32, false>, FuncQuantileExactWeighted<Float32, false>> ||
+        std::is_same_v<Function<Float32, false>, FuncQuantilesExactWeighted<Float32, false>>;
+}
 
 template <template <typename, bool> class Function>
 AggregateFunctionPtr createAggregateFunctionQuantile(const std::string & name, const DataTypes & argument_types, const Array & params)
@@ -71,7 +81,7 @@ AggregateFunctionPtr createAggregateFunctionQuantile(const std::string & name, c
 
 #define DISPATCH(TYPE) \
     if (which.idx == TypeIndex::TYPE) return std::make_shared<Function<TYPE, true>>(argument_types, params);
-    FOR_NUMERIC_TYPES(DISPATCH)
+    FOR_BASIC_NUMERIC_TYPES(DISPATCH)
 #undef DISPATCH
     if (which.idx == TypeIndex::Date) return std::make_shared<Function<DataTypeDate::FieldType, false>>(argument_types, params);
     if (which.idx == TypeIndex::DateTime) return std::make_shared<Function<DataTypeDateTime::FieldType, false>>(argument_types, params);
@@ -81,6 +91,14 @@ AggregateFunctionPtr createAggregateFunctionQuantile(const std::string & name, c
         if (which.idx == TypeIndex::Decimal32) return std::make_shared<Function<Decimal32, false>>(argument_types, params);
         if (which.idx == TypeIndex::Decimal64) return std::make_shared<Function<Decimal64, false>>(argument_types, params);
         if (which.idx == TypeIndex::Decimal128) return std::make_shared<Function<Decimal128, false>>(argument_types, params);
+        //if (which.idx == TypeIndex::Decimal256) return std::make_shared<Function<Decimal256, false>>(argument_types, params);
+    }
+
+    if constexpr (supportBigInt<Function>())
+    {
+        if (which.idx == TypeIndex::Int128) return std::make_shared<Function<Int128, true>>(argument_types, params);
+        if (which.idx == TypeIndex::bInt256) return std::make_shared<Function<bInt256, true>>(argument_types, params);
+        if (which.idx == TypeIndex::bUInt256) return std::make_shared<Function<bUInt256, true>>(argument_types, params);
     }
 
     throw Exception("Illegal type " + argument_type->getName() + " of argument for aggregate function " + name,

diff --git a/src/AggregateFunctions/AggregateFunctionStatisticsSimple.h b/src/AggregateFunctions/AggregateFunctionStatisticsSimple.h
@@ -433,11 +433,20 @@ class AggregateFunctionVarianceSimple final
     {
         if constexpr (StatFunc::num_args == 2)
             this->data(place).add(
-                static_cast<const ColVecT1 &>(*columns[0]).getData()[row_num],
-                static_cast<const ColVecT2 &>(*columns[1]).getData()[row_num]);
+                static_cast<ResultType>(static_cast<const ColVecT1 &>(*columns[0]).getData()[row_num]),
+                static_cast<ResultType>(static_cast<const ColVecT2 &>(*columns[1]).getData()[row_num]));
         else
-            this->data(place).add(
-                static_cast<const ColVecT1 &>(*columns[0]).getData()[row_num]);
+        {
+            if constexpr (std::is_same_v<T1, Decimal256>)
+            {
+                this->data(place).add(static_cast<ResultType>(
+                    static_cast<const ColVecT1 &>(*columns[0]).getData()[row_num].value
+                ));
+            }
+            else
+                this->data(place).add(
+                    static_cast<ResultType>(static_cast<const ColVecT1 &>(*columns[0]).getData()[row_num]));
+        }
     }
 
     void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override

diff --git a/src/AggregateFunctions/AggregateFunctionSum.cpp b/src/AggregateFunctions/AggregateFunctionSum.cpp
@@ -19,7 +19,10 @@ template <typename T>
 struct SumSimple
 {
     /// @note It uses slow Decimal128 (cause we need such a variant). sumWithOverflow is faster for Decimal32/64
-    using ResultType = std::conditional_t<IsDecimalNumber<T>, Decimal128, NearestFieldType<T>>;
+    using ResultType = std::conditional_t<IsDecimalNumber<T>,
+                                        std::conditional_t<std::is_same_v<T, Decimal256>, Decimal256, Decimal128>,
+                                        NearestFieldType<T>>;
+    // using ResultType = std::conditional_t<IsDecimalNumber<T>, Decimal128, NearestFieldType<T>>;
     using AggregateDataType = AggregateFunctionSumData<ResultType>;
     using Function = AggregateFunctionSum<T, ResultType, AggregateDataType, AggregateFunctionTypeSum>;
 };