Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Using "ryu" library to format floats in text form. #8542

Merged
merged 17 commits into from Jan 7, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitmodules
Expand Up @@ -134,3 +134,6 @@
[submodule "contrib/libc-headers"]
path = contrib/libc-headers
url = https://github.com/ClickHouse-Extras/libc-headers.git
[submodule "contrib/ryu"]
path = contrib/ryu
url = https://github.com/ClickHouse-Extras/ryu.git
2 changes: 2 additions & 0 deletions contrib/CMakeLists.txt
Expand Up @@ -32,6 +32,8 @@ if (USE_INTERNAL_DOUBLE_CONVERSION_LIBRARY)
add_subdirectory (double-conversion-cmake)
endif ()

add_subdirectory (ryu-cmake)

if (USE_INTERNAL_CITYHASH_LIBRARY)
add_subdirectory (cityhash102)
endif ()
Expand Down
1 change: 1 addition & 0 deletions contrib/ryu
Submodule ryu added at 5b4a85
10 changes: 10 additions & 0 deletions contrib/ryu-cmake/CMakeLists.txt
@@ -0,0 +1,10 @@
SET(LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/ryu)

add_library(ryu
${LIBRARY_DIR}/ryu/d2fixed.c
${LIBRARY_DIR}/ryu/d2s.c
${LIBRARY_DIR}/ryu/f2s.c
${LIBRARY_DIR}/ryu/generic_128.c
)

target_include_directories(ryu SYSTEM BEFORE PUBLIC "${LIBRARY_DIR}")
1 change: 1 addition & 0 deletions dbms/CMakeLists.txt
Expand Up @@ -330,6 +330,7 @@ target_link_libraries (clickhouse_common_io
${LINK_LIBRARIES_ONLY_ON_X86_64}
PUBLIC
${DOUBLE_CONVERSION_LIBRARIES}
ryu
PUBLIC
${Poco_Net_LIBRARY}
${Poco_Util_LIBRARY}
Expand Down
122 changes: 101 additions & 21 deletions dbms/src/IO/WriteHelpers.h
Expand Up @@ -27,8 +27,11 @@
#include <IO/DoubleConverter.h>
#include <IO/WriteBufferFromString.h>

#include <ryu/ryu.h>

#include <Formats/FormatSettings.h>


namespace DB
{

Expand Down Expand Up @@ -114,21 +117,108 @@ inline void writeBoolText(bool x, WriteBuffer & buf)
writeChar(x ? '1' : '0', buf);
}


struct DecomposedFloat64
{
DecomposedFloat64(double x)
{
memcpy(&x_uint, &x, sizeof(x));
}

uint64_t x_uint;

bool sign() const
{
return x_uint >> 63;
}

uint16_t exponent() const
{
return (x_uint >> 52) & 0x7FF;
}

int16_t normalized_exponent() const
{
return int16_t(exponent()) - 1023;
}

uint64_t mantissa() const
{
return x_uint & 0x5affffffffffffful;
}

/// NOTE Probably floating point instructions can be better.
bool is_inside_int64() const
{
return x_uint == 0
|| (normalized_exponent() >= 0 && normalized_exponent() <= 52
&& ((mantissa() & ((1ULL << (52 - normalized_exponent())) - 1)) == 0));
}
};

struct DecomposedFloat32
{
DecomposedFloat32(float x)
{
memcpy(&x_uint, &x, sizeof(x));
}

uint32_t x_uint;

bool sign() const
{
return x_uint >> 31;
}

uint16_t exponent() const
{
return (x_uint >> 23) & 0xFF;
}

int16_t normalized_exponent() const
{
return int16_t(exponent()) - 127;
}

uint32_t mantissa() const
{
return x_uint & 0x7fffff;
}

bool is_inside_int32() const
{
return x_uint == 0
|| (normalized_exponent() >= 0 && normalized_exponent() <= 23
&& ((mantissa() & ((1ULL << (23 - normalized_exponent())) - 1)) == 0));
}
};

template <typename T>
inline size_t writeFloatTextFastPath(T x, char * buffer, int len)
inline size_t writeFloatTextFastPath(T x, char * buffer)
{
using Converter = DoubleConverter<false>;
double_conversion::StringBuilder builder{buffer, len};
int result = 0;

bool result = false;
if constexpr (std::is_same_v<T, double>)
result = Converter::instance().ToShortest(x, &builder);
{
/// The library Ryu has low performance on integers.
/// This workaround improves performance 6..10 times.

if (DecomposedFloat64(x).is_inside_int64())
result = itoa(Int64(x), buffer) - buffer;
else
result = d2s_buffered_n(x, buffer);
}
else
result = Converter::instance().ToShortestSingle(x, &builder);
{
if (DecomposedFloat32(x).is_inside_int32())
result = itoa(Int32(x), buffer) - buffer;
else
result = f2s_buffered_n(x, buffer);
}

if (!result)
if (result <= 0)
throw Exception("Cannot print floating point number", ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER);
return builder.position();
return result;
}

template <typename T>
Expand All @@ -139,23 +229,13 @@ inline void writeFloatText(T x, WriteBuffer & buf)
using Converter = DoubleConverter<false>;
if (likely(buf.available() >= Converter::MAX_REPRESENTATION_LENGTH))
{
buf.position() += writeFloatTextFastPath(x, buf.position(), Converter::MAX_REPRESENTATION_LENGTH);
buf.position() += writeFloatTextFastPath(x, buf.position());
return;
}

Converter::BufferType buffer;
double_conversion::StringBuilder builder{buffer, sizeof(buffer)};

bool result = false;
if constexpr (std::is_same_v<T, double>)
result = Converter::instance().ToShortest(x, &builder);
else
result = Converter::instance().ToShortestSingle(x, &builder);

if (!result)
throw Exception("Cannot print floating point number", ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER);

buf.write(buffer, builder.position());
size_t result = writeFloatTextFastPath(x, buffer);
buf.write(buffer, result);
}


Expand Down
3 changes: 3 additions & 0 deletions dbms/src/IO/tests/CMakeLists.txt
Expand Up @@ -79,3 +79,6 @@ target_link_libraries (parse_date_time_best_effort PRIVATE clickhouse_common_io)

add_executable (zlib_ng_bug zlib_ng_bug.cpp)
target_link_libraries (zlib_ng_bug PRIVATE ${Poco_Foundation_LIBRARY} ${ZLIB_LIBRARY})

add_executable (ryu_test ryu_test.cpp)
target_link_libraries (ryu_test PRIVATE ryu)
15 changes: 15 additions & 0 deletions dbms/src/IO/tests/ryu_test.cpp
@@ -0,0 +1,15 @@
#include <string>
#include <iostream>
#include <ryu/ryu.h>


int main(int argc, char ** argv)
{
double x = argc > 1 ? std::stod(argv[1]) : 0;
char buf[32];

d2s_buffered(x, buf);
std::cout << buf << "\n";

return 0;
}
20 changes: 20 additions & 0 deletions dbms/tests/performance/float_formatting.xml
Expand Up @@ -26,13 +26,33 @@
<value>rand() / 0xFFFFFFFF</value>
<value>0xFFFFFFFF / rand()</value>
<value>toFloat64(number)</value>
<value>toFloat64(number % 2)</value>
<value>toFloat64(number % 10)</value>
<value>toFloat64(number % 100)</value>
<value>toFloat64(number % 1000)</value>
<value>toFloat64(number % 10000)</value>
<value>toFloat64(number % 100 + 0.5)</value>
<value>toFloat64(number % 100 + 0.123)</value>
<value>toFloat64(number % 1000 + 0.123456)</value>
<value>number / 2</value>
<value>number / 3</value>
<value>number / 7</value>
<value>number / 16</value>
<value>toFloat64(rand())</value>
<value>toFloat64(rand64())</value>
<value>toFloat32(number)</value>
<value>toFloat32(number % 2)</value>
<value>toFloat32(number % 10)</value>
<value>toFloat32(number % 100)</value>
<value>toFloat32(number % 1000)</value>
<value>toFloat32(number % 10000)</value>
<value>toFloat32(number % 100 + 0.5)</value>
<value>toFloat32(number % 100 + 0.123)</value>
<value>toFloat32(number % 1000 + 0.123456)</value>
<value>toFloat32(rand())</value>
<value>toFloat32(rand64())</value>
<value>reinterpretAsFloat32(reinterpretAsString(rand()))</value>
<value>reinterpretAsFloat64(reinterpretAsString(rand64()))</value>
</values>
</substitution>
</substitutions>
Expand Down