Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Show halves of checksums in correct order #51040

Merged
merged 5 commits into from
Jun 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
19 changes: 19 additions & 0 deletions base/base/wide_integer_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ using FromDoubleIntermediateType = long double;
using FromDoubleIntermediateType = boost::multiprecision::cpp_bin_float_double_extended;
#endif

namespace CityHash_v1_0_2 { struct uint128; }

namespace wide
{

Expand Down Expand Up @@ -281,6 +283,17 @@ struct integer<Bits, Signed>::_impl
}
}

template <typename CityHashUInt128 = CityHash_v1_0_2::uint128>
constexpr static void wide_integer_from_cityhash_uint128(integer<Bits, Signed> & self, const CityHashUInt128 & value) noexcept
{
static_assert(sizeof(item_count) >= 2);

if constexpr (std::endian::native == std::endian::little)
wide_integer_from_tuple_like(self, std::make_pair(value.low64, value.high64));
else
wide_integer_from_tuple_like(self, std::make_pair(value.high64, value.low64));
}

/**
* N.B. t is constructed from double, so max(t) = max(double) ~ 2^310
* the recursive call happens when t / 2^64 > 2^64, so there won't be more than 5 of them.
Expand Down Expand Up @@ -1036,6 +1049,8 @@ constexpr integer<Bits, Signed>::integer(T rhs) noexcept
_impl::wide_integer_from_wide_integer(*this, rhs);
else if constexpr (IsTupleLike<T>::value)
_impl::wide_integer_from_tuple_like(*this, rhs);
else if constexpr (std::is_same_v<std::remove_cvref_t<T>, CityHash_v1_0_2::uint128>)
_impl::wide_integer_from_cityhash_uint128(*this, rhs);
else
_impl::wide_integer_from_builtin(*this, rhs);
}
Expand All @@ -1051,6 +1066,8 @@ constexpr integer<Bits, Signed>::integer(std::initializer_list<T> il) noexcept
_impl::wide_integer_from_wide_integer(*this, *il.begin());
else if constexpr (IsTupleLike<T>::value)
_impl::wide_integer_from_tuple_like(*this, *il.begin());
else if constexpr (std::is_same_v<std::remove_cvref_t<T>, CityHash_v1_0_2::uint128>)
_impl::wide_integer_from_cityhash_uint128(*this, *il.begin());
else
_impl::wide_integer_from_builtin(*this, *il.begin());
}
Expand Down Expand Up @@ -1088,6 +1105,8 @@ constexpr integer<Bits, Signed> & integer<Bits, Signed>::operator=(T rhs) noexce
{
if constexpr (IsTupleLike<T>::value)
_impl::wide_integer_from_tuple_like(*this, rhs);
else if constexpr (std::is_same_v<std::remove_cvref_t<T>, CityHash_v1_0_2::uint128>)
_impl::wide_integer_from_cityhash_uint128(*this, rhs);
else
_impl::wide_integer_from_builtin(*this, rhs);
return *this;
Expand Down
19 changes: 16 additions & 3 deletions contrib/cityhash102/include/city.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,11 +61,24 @@ namespace CityHash_v1_0_2
typedef uint8_t uint8;
typedef uint32_t uint32;
typedef uint64_t uint64;
typedef std::pair<uint64, uint64> uint128;

/// Represent an unsigned integer of 128 bits as it's used in CityHash.
/// Originally CityHash used `std::pair<uint64, uint64>` instead of this struct,
/// however the members `first` and `second` could be easily confused so they were renamed to `low64` and `high64`:
/// `first` -> `low64`, `second` -> `high64`.
struct uint128
{
uint64 low64 = 0;
uint64 high64 = 0;

uint128() = default;
uint128(uint64 low64_, uint64 high64_) : low64(low64_), high64(high64_) {}
friend bool operator ==(const uint128 & x, const uint128 & y) { return (x.low64 == y.low64) && (x.high64 == y.high64); }
friend bool operator !=(const uint128 & x, const uint128 & y) { return !(x == y); }
};

inline uint64 Uint128Low64(const uint128& x) { return x.first; }
inline uint64 Uint128High64(const uint128& x) { return x.second; }
inline uint64 Uint128Low64(const uint128 & x) { return x.low64; }
inline uint64 Uint128High64(const uint128 & x) { return x.high64; }

// Hash function for a byte array.
uint64 CityHash64(const char *buf, size_t len);
Expand Down
16 changes: 8 additions & 8 deletions src/Compression/CompressedReadBufferBase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@ static void validateChecksum(char * data, size_t size, const Checksum expected_c

/// TODO mess up of endianness in error message.
message << "Checksum doesn't match: corrupted data."
" Reference: " + getHexUIntLowercase(expected_checksum.first) + getHexUIntLowercase(expected_checksum.second)
+ ". Actual: " + getHexUIntLowercase(calculated_checksum.first) + getHexUIntLowercase(calculated_checksum.second)
" Reference: " + getHexUIntLowercase(expected_checksum.high64) + getHexUIntLowercase(expected_checksum.low64)
+ ". Actual: " + getHexUIntLowercase(calculated_checksum.high64) + getHexUIntLowercase(calculated_checksum.low64)
+ ". Size of compressed block: " + toString(size);

const char * message_hardware_failure = "This is most likely due to hardware failure. "
Expand Down Expand Up @@ -95,8 +95,8 @@ static void validateChecksum(char * data, size_t size, const Checksum expected_c
}

/// Check if the difference caused by single bit flip in stored checksum.
size_t difference = std::popcount(expected_checksum.first ^ calculated_checksum.first)
+ std::popcount(expected_checksum.second ^ calculated_checksum.second);
size_t difference = std::popcount(expected_checksum.low64 ^ calculated_checksum.low64)
+ std::popcount(expected_checksum.high64 ^ calculated_checksum.high64);

if (difference == 1)
{
Expand Down Expand Up @@ -194,8 +194,8 @@ size_t CompressedReadBufferBase::readCompressedData(size_t & size_decompressed,
{
Checksum checksum;
ReadBufferFromMemory checksum_in(own_compressed_buffer.data(), sizeof(checksum));
readBinaryLittleEndian(checksum.first, checksum_in);
readBinaryLittleEndian(checksum.second, checksum_in);
readBinaryLittleEndian(checksum.low64, checksum_in);
readBinaryLittleEndian(checksum.high64, checksum_in);

validateChecksum(compressed_buffer, size_compressed_without_checksum, checksum);
}
Expand Down Expand Up @@ -238,8 +238,8 @@ size_t CompressedReadBufferBase::readCompressedDataBlockForAsynchronous(size_t &
{
Checksum checksum;
ReadBufferFromMemory checksum_in(own_compressed_buffer.data(), sizeof(checksum));
readBinaryLittleEndian(checksum.first, checksum_in);
readBinaryLittleEndian(checksum.second, checksum_in);
readBinaryLittleEndian(checksum.low64, checksum_in);
readBinaryLittleEndian(checksum.high64, checksum_in);

validateChecksum(compressed_buffer, size_compressed_without_checksum, checksum);
}
Expand Down
8 changes: 4 additions & 4 deletions src/Compression/CompressedWriteBuffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ void CompressedWriteBuffer::nextImpl()

CityHash_v1_0_2::uint128 checksum = CityHash_v1_0_2::CityHash128(out_compressed_ptr, compressed_size);

writeBinaryLittleEndian(checksum.first, out);
writeBinaryLittleEndian(checksum.second, out);
writeBinaryLittleEndian(checksum.low64, out);
writeBinaryLittleEndian(checksum.high64, out);

out.position() += compressed_size;
}
Expand All @@ -50,8 +50,8 @@ void CompressedWriteBuffer::nextImpl()

CityHash_v1_0_2::uint128 checksum = CityHash_v1_0_2::CityHash128(compressed_buffer.data(), compressed_size);

writeBinaryLittleEndian(checksum.first, out);
writeBinaryLittleEndian(checksum.second, out);
writeBinaryLittleEndian(checksum.low64, out);
writeBinaryLittleEndian(checksum.high64, out);

out.write(compressed_buffer.data(), compressed_size);
}
Expand Down
4 changes: 2 additions & 2 deletions src/Storages/Distributed/DistributedAsyncInsertHeader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@ DistributedAsyncInsertHeader DistributedAsyncInsertHeader::read(ReadBufferFromFi
{
throw Exception(ErrorCodes::CHECKSUM_DOESNT_MATCH,
"Checksum of extra info doesn't match: corrupted data. Reference: {}{}. Actual: {}{}.",
getHexUIntLowercase(expected_checksum.first), getHexUIntLowercase(expected_checksum.second),
getHexUIntLowercase(calculated_checksum.first), getHexUIntLowercase(calculated_checksum.second));
getHexUIntLowercase(expected_checksum.high64), getHexUIntLowercase(expected_checksum.low64),
getHexUIntLowercase(calculated_checksum.high64), getHexUIntLowercase(calculated_checksum.low64));
}

/// Read the parts of the header.
Expand Down
2 changes: 1 addition & 1 deletion src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -400,7 +400,7 @@ void DataPartStorageOnDiskBase::backup(
if (it != checksums.files.end())
{
file_size = it->second.file_size;
file_hash = {it->second.file_hash.first, it->second.file_hash.second};
file_hash = it->second.file_hash;
}

BackupEntryPtr backup_entry = std::make_unique<BackupEntryFromImmutableFile>(disk, filepath_on_disk, copy_encrypted, file_size, file_hash);
Expand Down
8 changes: 4 additions & 4 deletions src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -154,19 +154,19 @@ bool MergeTreeDataPartChecksums::readV2(ReadBuffer & in)
assertString("\n\tsize: ", in);
readText(sum.file_size, in);
assertString("\n\thash: ", in);
readText(sum.file_hash.first, in);
readText(sum.file_hash.low64, in);
assertString(" ", in);
readText(sum.file_hash.second, in);
readText(sum.file_hash.high64, in);
assertString("\n\tcompressed: ", in);
readText(sum.is_compressed, in);
if (sum.is_compressed)
{
assertString("\n\tuncompressed size: ", in);
readText(sum.uncompressed_size, in);
assertString("\n\tuncompressed hash: ", in);
readText(sum.uncompressed_hash.first, in);
readText(sum.uncompressed_hash.low64, in);
assertString(" ", in);
readText(sum.uncompressed_hash.second, in);
readText(sum.uncompressed_hash.high64, in);
}
assertChar('\n', in);

Expand Down
8 changes: 4 additions & 4 deletions src/Storages/MergeTree/PartMetadataManagerWithCache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -250,8 +250,8 @@ std::unordered_map<String, IPartMetadataManager::uint128> PartMetadataManagerWit
ErrorCodes::CORRUPTED_DATA,
"Checksums doesn't match in part {} for {}. Expected: {}. Found {}.",
part->name, file_path,
getHexUIntUppercase(disk_checksum.first) + getHexUIntUppercase(disk_checksum.second),
getHexUIntUppercase(cache_checksums[i].first) + getHexUIntUppercase(cache_checksums[i].second));
getHexUIntUppercase(disk_checksum.high64) + getHexUIntUppercase(disk_checksum.low64),
getHexUIntUppercase(cache_checksums[i].high64) + getHexUIntUppercase(cache_checksums[i].low64));

disk_checksums.push_back(disk_checksum);
continue;
Expand Down Expand Up @@ -287,8 +287,8 @@ std::unordered_map<String, IPartMetadataManager::uint128> PartMetadataManagerWit
ErrorCodes::CORRUPTED_DATA,
"Checksums doesn't match in projection part {} {}. Expected: {}. Found {}.",
part->name, proj_name,
getHexUIntUppercase(disk_checksum.first) + getHexUIntUppercase(disk_checksum.second),
getHexUIntUppercase(cache_checksums[i].first) + getHexUIntUppercase(cache_checksums[i].second));
getHexUIntUppercase(disk_checksum.high64) + getHexUIntUppercase(disk_checksum.low64),
getHexUIntUppercase(cache_checksums[i].high64) + getHexUIntUppercase(cache_checksums[i].low64));
disk_checksums.push_back(disk_checksum);
}
return results;
Expand Down
6 changes: 3 additions & 3 deletions src/Storages/System/StorageSystemParts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -252,17 +252,17 @@ void StorageSystemParts::processNextStorage(
if (columns_mask[src_index++])
{
auto checksum = helper.hash_of_all_files;
columns[res_index++]->insert(getHexUIntLowercase(checksum.first) + getHexUIntLowercase(checksum.second));
columns[res_index++]->insert(getHexUIntLowercase(checksum.high64) + getHexUIntLowercase(checksum.low64));
}
if (columns_mask[src_index++])
{
auto checksum = helper.hash_of_uncompressed_files;
columns[res_index++]->insert(getHexUIntLowercase(checksum.first) + getHexUIntLowercase(checksum.second));
columns[res_index++]->insert(getHexUIntLowercase(checksum.high64) + getHexUIntLowercase(checksum.low64));
}
if (columns_mask[src_index++])
{
auto checksum = helper.uncompressed_hash_of_compressed_files;
columns[res_index++]->insert(getHexUIntLowercase(checksum.first) + getHexUIntLowercase(checksum.second));
columns[res_index++]->insert(getHexUIntLowercase(checksum.high64) + getHexUIntLowercase(checksum.low64));
}
}

Expand Down
6 changes: 3 additions & 3 deletions src/Storages/System/StorageSystemProjectionParts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -221,17 +221,17 @@ void StorageSystemProjectionParts::processNextStorage(
if (columns_mask[src_index++])
{
auto checksum = helper.hash_of_all_files;
columns[res_index++]->insert(getHexUIntLowercase(checksum.first) + getHexUIntLowercase(checksum.second));
columns[res_index++]->insert(getHexUIntLowercase(checksum.high64) + getHexUIntLowercase(checksum.low64));
}
if (columns_mask[src_index++])
{
auto checksum = helper.hash_of_uncompressed_files;
columns[res_index++]->insert(getHexUIntLowercase(checksum.first) + getHexUIntLowercase(checksum.second));
columns[res_index++]->insert(getHexUIntLowercase(checksum.high64) + getHexUIntLowercase(checksum.low64));
}
if (columns_mask[src_index++])
{
auto checksum = helper.uncompressed_hash_of_compressed_files;
columns[res_index++]->insert(getHexUIntLowercase(checksum.first) + getHexUIntLowercase(checksum.second));
columns[res_index++]->insert(getHexUIntLowercase(checksum.high64) + getHexUIntLowercase(checksum.low64));
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1 +1 @@
20000101_1_1_0 test_00961 b5fce9c4ef1ca42ce4ed027389c208d2 fc3b062b646cd23d4c23d7f5920f89ae da96ff1e527a8a1f908ddf2b1d0af239
20000101_1_1_0 test_00961 e4ed027389c208d2b5fce9c4ef1ca42c 4c23d7f5920f89aefc3b062b646cd23d 908ddf2b1d0af239da96ff1e527a8a1f
2 changes: 1 addition & 1 deletion utils/checksum-for-compressed-block/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ int main(int, char **)
{
auto flipped = flipBit(str, pos);
auto checksum = CityHash_v1_0_2::CityHash128(flipped.data(), flipped.size());
std::cout << getHexUIntLowercase(checksum.first) << getHexUIntLowercase(checksum.second) << "\t" << pos / 8 << ", " << pos % 8 << "\n";
std::cout << getHexUIntLowercase(checksum.high64) << getHexUIntLowercase(checksum.low64) << "\t" << pos / 8 << ", " << pos % 8 << "\n";
}

return 0;
Expand Down