Skip to content

Commit

Permalink
Merge pull request #3968 from amosbird/writeint
Browse files Browse the repository at this point in the history
Better writeInt
  • Loading branch information
alexey-milovidov committed Jan 2, 2019
2 parents 9ed4fce + df5b735 commit 2d53e51
Show file tree
Hide file tree
Showing 4 changed files with 471 additions and 201 deletions.
5 changes: 3 additions & 2 deletions dbms/src/Dictionaries/TrieDictionary.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include <Poco/ByteOrder.h>
#include <Poco/Net/IPAddress.h>
#include <Common/formatIPv6.h>
#include <common/itoa.h>
#include <ext/map.h>
#include <ext/range.h>
#include "DictionaryBlockInputStream.h"
Expand Down Expand Up @@ -778,8 +779,8 @@ BlockInputStreamPtr TrieDictionary::getBlockInputStream(const Names & column_nam
char * ptr = buffer;
formatIPv6(reinterpret_cast<const unsigned char *>(ip_column.getDataAt(row).data), ptr);
*(ptr - 1) = '/';
auto size = detail::writeUIntText(mask, ptr);
column->insertData(buffer, size + (ptr - buffer));
ptr = itoa(mask, ptr);
column->insertData(buffer, ptr - buffer);
}
return ColumnsWithTypeAndName{
ColumnWithTypeAndName(std::move(column), std::make_shared<DataTypeString>(), attributes.front().name)};
Expand Down
210 changes: 11 additions & 199 deletions dbms/src/IO/WriteIntText.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,221 +2,33 @@

#include <limits>
#include <type_traits>

#include <common/likely.h>

#include <Core/Types.h>
#include <IO/WriteBuffer.h>
#include <common/itoa.h>

/// 20 digits or 19 digits and a sign
#define WRITE_HELPERS_MAX_INT_WIDTH 20U

/// 40 digits or 39 digits and a sign
#define WRITE_HELPERS_MAX_INT_WIDTH 40U

namespace DB
{

namespace detail
{
/** See:
* https://github.com/localvoid/cxx-benchmark-itoa
* http://www.slideshare.net/andreialexandrescu1/three-optimization-tips-for-c-15708507
* http://vimeo.com/55639112
*/


/// The usual way, if there is not enough space in the buffer for any number to fit there.
template <typename T>
void writeUIntTextFallback(T x, WriteBuffer & buf)
void NO_INLINE writeUIntTextFallback(T x, WriteBuffer & buf)
{
if (x == 0)
{
buf.nextIfAtEnd();
*buf.position() = '0';
++buf.position();

return;
}

char tmp[WRITE_HELPERS_MAX_INT_WIDTH];

char * pos;
for (pos = tmp + WRITE_HELPERS_MAX_INT_WIDTH - 1; x != 0; --pos)
{
*pos = '0' + x % 10;
x /= 10;
}

++pos;

buf.write(pos, tmp + WRITE_HELPERS_MAX_INT_WIDTH - pos);
int len = itoa(x, tmp) - tmp;
buf.write(tmp, len);
}


/** Count the number of decimal digits in the number.
* Works well for nonuniform distribution of numbers, which usually happens.
* If most of the numbers are long, then a "branchless" code with a `bsr` instruction and a smart conversion would work better.
*/
template <typename T>
UInt32 digits10(T x)
{
if (x < 10ULL)
return 1;
if (x < 100ULL)
return 2;
if (x < 1000ULL)
return 3;

if (x < 1000000000000ULL)
{
if (x < 100000000ULL)
{
if (x < 1000000ULL)
{
if (x < 10000ULL)
return 4;
else
return 5 + (x >= 100000ULL);
}

return 7 + (x >= 10000000ULL);
}

if (x < 10000000000ULL)
return 9 + (x >= 1000000000ULL);

return 11 + (x >= 100000000000ULL);
}

return 12 + digits10(x / 1000000000000ULL);
}


/** Converts two digits per iteration.
* Works well for the nonuniform distribution of numbers, which usually happens.
* If most of the numbers are long, and if you do care about the cache, then the variant
* with a large table and four digits per iteration.
*/
template <typename T>
UInt32 writeUIntText(T x, char * dst)
{
static const char digits[201] =
"00010203040506070809"
"10111213141516171819"
"20212223242526272829"
"30313233343536373839"
"40414243444546474849"
"50515253545556575859"
"60616263646566676869"
"70717273747576777879"
"80818283848586878889"
"90919293949596979899";

const UInt32 length = digits10(x);
UInt32 next = length - 1;

while (x >= 100)
{
const auto i = (x % 100) * 2;
x /= 100;
dst[next] = digits[i + 1];
dst[next - 1] = digits[i];
next -= 2;
}

if (x < 10)
{
dst[next] = '0' + x;
}
else
{
const auto i = x * 2;
dst[next] = digits[i + 1];
dst[next - 1] = digits[i];
}

return length;
}


/** If there is enough space in the buffer - calls an optimized version, otherwise - the normal version.
*/
template <typename T>
void writeUIntText(T x, WriteBuffer & buf)
{
if (likely(buf.position() + WRITE_HELPERS_MAX_INT_WIDTH < buf.buffer().end()))
buf.position() += writeUIntText(x, buf.position());
else
writeUIntTextFallback(x, buf);
}


inline void writeLeadingMinus(WriteBuffer & buf)
{
buf.nextIfAtEnd();
*buf.position() = '-';
++buf.position();
}

/** Wrapper for signed numbers.
*/
template <typename T>
void writeSIntText(T x, WriteBuffer & buf)
{
/// A special case for the smallest negative number
if (unlikely(x == std::numeric_limits<T>::min()))
{
if (sizeof(x) == 1)
buf.write("-128", 4);
else if (sizeof(x) == 2)
buf.write("-32768", 6);
else if (sizeof(x) == 4)
buf.write("-2147483648", 11);
else
buf.write("-9223372036854775808", 20);
return;
}

if (x < 0)
{
x = -x;
writeLeadingMinus(buf);
}

writeUIntText(static_cast<std::make_unsigned_t<T>>(x), buf);
}

inline void writeSIntText(__int128 x, WriteBuffer & buf)
{
static const __int128 min_int128 = __int128(0x8000000000000000ll) << 64;

if (unlikely(x == min_int128))
{
buf.write("-170141183460469231731687303715884105728", 40);
return;
}

if (x < 0)
{
x = -x;
writeLeadingMinus(buf);
}

writeUIntText(static_cast<unsigned __int128>(x), buf);
}
}


template <typename T>
std::enable_if_t<std::is_signed_v<T> || std::is_same_v<T, Int128>, void> writeIntText(T x, WriteBuffer & buf)
{
detail::writeSIntText(x, buf);

}

template <typename T>
std::enable_if_t<std::is_unsigned_v<T>, void> writeIntText(T x, WriteBuffer & buf)
void writeIntText(T x, WriteBuffer & buf)
{
detail::writeUIntText(x, buf);
if (likely(buf.position() + WRITE_HELPERS_MAX_INT_WIDTH < buf.buffer().end()))
buf.position() = itoa(x, buf.position());
else
detail::writeUIntTextFallback(x, buf);
}

}
107 changes: 107 additions & 0 deletions dbms/tests/performance/int_parsing/int_parsing.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
<test>
<name>int_parsing</name>
<type>loop</type>

<stop_conditions>
<all_of>
<iterations>3</iterations>
<min_time_not_changing_for_ms>10000</min_time_not_changing_for_ms>
</all_of>
<any_of>
<iterations>5</iterations>
<total_time_ms>60000</total_time_ms>
</any_of>
</stop_conditions>

<main_metric>
<min_time/>
</main_metric>

<preconditions>
<table_exists>test.hits</table_exists>
</preconditions>

<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(WatchID)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(JavaEnable)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(GoodEvent)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(CounterID)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(ClientIP)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(RegionID)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(UserID)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(CounterClass)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(OS)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(UserAgent)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(Refresh)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(IsRobot)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(ResolutionWidth)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(ResolutionHeight)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(ResolutionDepth)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(FlashMajor)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(FlashMinor)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(NetMajor)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(NetMinor)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(UserAgentMajor)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(CookieEnable)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(JavascriptEnable)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(IsMobile)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(MobilePhone)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(IPNetworkID)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(TraficSourceID)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(SearchEngineID)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(SearchPhrase)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(AdvEngineID)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(IsArtifical)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(WindowClientWidth)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(WindowClientHeight)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(ClientTimeZone)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(SilverlightVersion1)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(SilverlightVersion2)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(SilverlightVersion3)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(SilverlightVersion4)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(CodeVersion)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(IsLink)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(IsDownload)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(IsNotBounce)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(FUniqID)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(HID)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(IsOldCounter)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(IsEvent)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(IsParameter)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(DontCountHits)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(WithHash)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(Age)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(Sex)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(Income)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(Interests)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(Robotness)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(RemoteIP)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(WindowName)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(OpenerName)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(HistoryLength)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(HTTPError)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(SendTiming)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(DNSTiming)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(ConnectTiming)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(ResponseStartTiming)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(ResponseEndTiming)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(FetchTiming)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(RedirectTiming)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(DOMInteractiveTiming)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(DOMContentLoadedTiming)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(DOMCompleteTiming)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(LoadEventStartTiming)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(LoadEventEndTiming)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(NSToDOMContentLoadedTiming)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(FirstPaintTiming)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(RedirectCount)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(SocialSourceNetworkID)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(ParamPrice)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(ParamCurrencyID)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(HasGCLID)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(RefererHash)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(URLHash)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(CLID)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(YCLID)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(RequestNum)) SETTINGS max_threads = 1</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(toString(RequestTry)) SETTINGS max_threads = 1</query>
</test>
Loading

0 comments on commit 2d53e51

Please sign in to comment.