From a8f3bf2738a0bae9ddd8950d3918d368b328a873 Mon Sep 17 00:00:00 2001 From: SergeyMakeev Date: Mon, 4 Mar 2024 11:13:47 -0800 Subject: [PATCH] refactor out KeyInfo + add wyhash --- .gitignore | 2 + CMakeLists.txt | 3 +- ExcaliburHash/ExcaliburHash.h | 66 +++--------------- ExcaliburHash/ExcaliburKeyInfo.h | 70 +++++++++++++++++++ ExcaliburHash/wyhash.h | 113 +++++++++++++++++++++++++++++++ ExcaliburHashTest01.cpp | 2 +- ExcaliburHashTest02.cpp | 4 +- ExcaliburHashTest03.cpp | 18 +---- ExcaliburHashTest04.cpp | 16 ----- ExcaliburHashTest05.cpp | 16 ----- gen_vs19_x86.cmd | 10 +++ 11 files changed, 211 insertions(+), 109 deletions(-) create mode 100644 ExcaliburHash/ExcaliburKeyInfo.h create mode 100644 ExcaliburHash/wyhash.h create mode 100644 gen_vs19_x86.cmd diff --git a/.gitignore b/.gitignore index 578ab03..356a8ba 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,7 @@ build2019 +build2019_32 build2022 +build2022_32 build codecov_report *.log diff --git a/CMakeLists.txt b/CMakeLists.txt index 9f5d856..857a99e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 2.8.12) +cmake_minimum_required(VERSION 3.6) # set(CMAKE_CONFIGURATION_TYPES "Debug;Release" CACHE STRING "" FORCE) @@ -23,6 +23,7 @@ set(TEST_SOURCES set (TEST_EXE_NAME ${PROJ_NAME}) add_executable(${TEST_EXE_NAME} ${TEST_SOURCES}) +set_property(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY VS_STARTUP_PROJECT ${TEST_EXE_NAME}) set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) add_subdirectory("${PROJECT_SOURCE_DIR}/extern/googletest" "extern/googletest") diff --git a/ExcaliburHash/ExcaliburHash.h b/ExcaliburHash/ExcaliburHash.h index 2a169df..1541fe2 100644 --- a/ExcaliburHash/ExcaliburHash.h +++ b/ExcaliburHash/ExcaliburHash.h @@ -52,53 +52,7 @@ #endif #endif -namespace Excalibur -{ - -// generic type (without implementation) -template struct KeyInfo -{ - // static inline T getTombstone() noexcept; - // static inline T getEmpty() noexcept; - // static inline uint64_t hash(const T& key) noexcept; - // static inline bool isEqual(const T& lhs, const T& rhs) noexcept; - // static inline bool isValid(const T& key) noexcept; -}; - -template <> struct KeyInfo -{ - static inline bool isValid(const int& key) noexcept { return key < 0x7ffffffe; } - static inline int getTombstone() noexcept { return 0x7fffffff; } - static inline int getEmpty() noexcept { return 0x7ffffffe; } - static inline uint64_t hash(const int& key) noexcept { return key; } - static inline bool isEqual(const int& lhs, const int& rhs) noexcept { return lhs == rhs; } -}; - -/* -template <> struct KeyInfo -{ - static inline bool isValid(const uint32_t& key) noexcept { return key < 0xfffffffe; } - static inline uint32_t getTombstone() noexcept { return 0xfffffffe; } - static inline uint32_t getEmpty() noexcept { return 0xffffffff; } - static inline uint64_t hash(const uint32_t& key) noexcept { return key; } - static inline bool isEqual(const uint32_t& lhs, const uint32_t& rhs) noexcept { return lhs == rhs; } -}; - - template <> struct KeyInfo -{ - static inline bool isValid(const std::string& key) noexcept { return !key.empty() && key.data()[0] != char(1); } - static inline std::string getTombstone() noexcept - { - // and let's hope that small string optimization will do the job - return std::string(1, char(1)); - } - static inline std::string getEmpty() noexcept { return std::string(); } - static inline uint64_t hash(const std::string& key) noexcept { return std::hash{}(key); } - static inline bool isEqual(const std::string& lhs, const std::string& rhs) noexcept { return lhs == rhs; } -}; -*/ - -} // namespace Excalibur +#include namespace Excalibur { @@ -416,11 +370,11 @@ template - inline std::pair emplaceToExisting(uint32_t numBuckets, TK&& key, Args&&... args) + inline std::pair emplaceToExisting(size_t numBuckets, TK&& key, Args&&... args) { // numBuckets has to be power-of-two EXLBR_ASSERT(numBuckets > 0); EXLBR_ASSERT((numBuckets & (numBuckets - 1)) == 0); - const uint64_t hashValue = TKeyInfo::hash(key); - const uint32_t bucketIndex = uint32_t(hashValue) & (numBuckets - 1); + const size_t hashValue = TKeyInfo::hash(key); + const size_t bucketIndex = hashValue & (numBuckets - 1); TItem* const firstItem = m_storage; TItem* const endItem = firstItem + numBuckets; TItem* EXLBR_RESTRICT currentItem = firstItem + bucketIndex; @@ -715,7 +669,7 @@ template second); // <--- when hash table grows it->second will point to a memory we are about to free - std::pair it = emplaceToExisting(numBucketsNew, key, args...); + std::pair it = emplaceToExisting(size_t(numBucketsNew), key, args...); - reinsert(numBucketsNew, item, enditem); + reinsert(size_t(numBucketsNew), item, enditem); if (!isInlineStorage) { diff --git a/ExcaliburHash/ExcaliburKeyInfo.h b/ExcaliburHash/ExcaliburKeyInfo.h new file mode 100644 index 0000000..3d4101a --- /dev/null +++ b/ExcaliburHash/ExcaliburKeyInfo.h @@ -0,0 +1,70 @@ +#pragma once + +#include + +#include "wyhash.h" + +namespace Excalibur +{ + +// generic type (without implementation) +template struct KeyInfo +{ + // static inline T getTombstone() noexcept; + // static inline T getEmpty() noexcept; + // static inline size_t hash(const T& key) noexcept; + // static inline bool isEqual(const T& lhs, const T& rhs) noexcept; + // static inline bool isValid(const T& key) noexcept; +}; + +template <> struct KeyInfo +{ + static inline bool isValid(const int32_t& key) noexcept { return key < INT32_C(0x7ffffffe); } + static inline int32_t getTombstone() noexcept { return INT32_C(0x7fffffff); } + static inline int32_t getEmpty() noexcept { return INT32_C(0x7ffffffe); } + static inline size_t hash(const int32_t& key) noexcept { return Excalibur::wyhash::hash(key); } + static inline bool isEqual(const int32_t& lhs, const int32_t& rhs) noexcept { return lhs == rhs; } +}; + +template <> struct KeyInfo +{ + static inline bool isValid(const uint32_t& key) noexcept { return key < UINT32_C(0xfffffffe); } + static inline uint32_t getTombstone() noexcept {return UINT32_C(0xfffffffe); } + static inline uint32_t getEmpty() noexcept {return UINT32_C(0xffffffff); } + static inline size_t hash(const uint32_t& key) noexcept { return Excalibur::wyhash::hash(key); } + static inline bool isEqual(const uint32_t& lhs, const uint32_t& rhs) noexcept { return lhs == rhs; } +}; + +template <> struct KeyInfo +{ + static inline bool isValid(const int64_t& key) noexcept { return key < INT64_C(0x7ffffffffffffffe); } + static inline int64_t getTombstone() noexcept { return INT64_C(0x7fffffffffffffff); } + static inline int64_t getEmpty() noexcept { return INT64_C(0x7ffffffffffffffe); } + static inline size_t hash(const int64_t& key) noexcept { return Excalibur::wyhash::hash(key); } + static inline bool isEqual(const int64_t& lhs, const int64_t& rhs) noexcept { return lhs == rhs; } +}; + +template <> struct KeyInfo +{ + static inline bool isValid(const uint64_t& key) noexcept { return key < UINT64_C(0xfffffffffffffffe); } + static inline uint64_t getTombstone() noexcept { return UINT64_C(0xfffffffffffffffe); } + static inline uint64_t getEmpty() noexcept { return UINT64_C(0xffffffffffffffff); } + static inline size_t hash(const uint64_t& key) noexcept { return Excalibur::wyhash::hash(key); } + static inline bool isEqual(const uint64_t& lhs, const uint64_t& rhs) noexcept { return lhs == rhs; } +}; + + template <> struct KeyInfo +{ + static inline bool isValid(const std::string& key) noexcept { return !key.empty() && key.data()[0] != char(1); } + static inline std::string getTombstone() noexcept + { + // and let's hope that small string optimization will do the job + return std::string(1, char(1)); + } + static inline std::string getEmpty() noexcept { return std::string(); } + static inline size_t hash(const std::string& key) noexcept { return std::hash{}(key); } + static inline bool isEqual(const std::string& lhs, const std::string& rhs) noexcept { return lhs == rhs; } +}; + + +} // namespace Excalibur diff --git a/ExcaliburHash/wyhash.h b/ExcaliburHash/wyhash.h new file mode 100644 index 0000000..5d2d070 --- /dev/null +++ b/ExcaliburHash/wyhash.h @@ -0,0 +1,113 @@ +#pragma once + +#include + +#if defined(_MSC_VER) + #define EXLBR_VISUAL_STUDIO (1) + #define EXLBR_FORCE_INLINE __forceinline +#endif + +#if defined(__clang__) + #define EXLBR_CLANG (1) + #define EXLBR_FORCE_INLINE __attribute__((always_inline)) +#endif + +#if defined(__GNUC__) + #define EXLBR_GCC (1) +#endif + +#if defined(_M_X64) || defined(__aarch64__) || defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || defined(__amd64) || \ + defined(__LP64__) || defined(_WIN64) + #define EXLBR_64 (1) +#else + #define EXLBR_32 (1) +#endif + +#if EXLBR_VISUAL_STUDIO + #include +#endif + +namespace Excalibur +{ + +namespace wyhash +{ + +#if EXLBR_64 + +inline uint64_t _hash64(uint64_t v) +{ + #if EXLBR_VISUAL_STUDIO + { + uint64_t h; + uint64_t l = _umul128(v, UINT64_C(0x9E3779B97F4A7C15), &h); + return l ^ h; + } + #elif + { + __uint128_t r = v; + r *= UINT64_C(0x9E3779B97F4A7C15); + return (uint64_t)(r >> 64U) ^ (uint64_t)(r); + } + #else + { + #error Unsupported compiler or platform + } + #endif +} + +#elif EXLBR_32 + +inline uint32_t _hash32(uint32_t v) +{ + #if EXLBR_VISUAL_STUDIO + { + uint64_t lh = __emulu(v, UINT32_C(0x9e3779b1)); + return (uint32_t)(lh >> 32U) ^ (uint32_t)(lh); + } + #elif + { + uint64_t lh = uint64_t(v) * uint64_t(0x9e3779b1); + return (uint32_t)(lh >> 32U) ^ (uint32_t)(lh); + } + #else + { + #error Unsupported compiler or platform + } + #endif +} +#else + #error Unsupported compiler or platform +#endif + +inline size_t hash(uint64_t v) +{ +#if EXLBR_64 + return _hash64(v); +#elif EXLBR_32 + uint32_t vv = (uint32_t)(v >> 32U) ^ (uint32_t)(v); + return _hash32(vv); +#else + #error Unsupported compiler or platform +#endif +} + +inline size_t hash(uint32_t v) +{ +#if EXLBR_64 + return _hash64(uint64_t(v)); +#elif EXLBR_32 + return _hash32(v); +#else + #error Unsupported compiler or platform +#endif +} + + +inline size_t hash(int64_t v) { return hash(uint64_t(v)); } +inline size_t hash(int32_t v) { return hash(uint32_t(v)); } + + +} // namespace wyhash + +} // namespace Excalibur \ No newline at end of file diff --git a/ExcaliburHashTest01.cpp b/ExcaliburHashTest01.cpp index b0638d7..ad7de4a 100644 --- a/ExcaliburHashTest01.cpp +++ b/ExcaliburHashTest01.cpp @@ -249,7 +249,7 @@ template <> struct KeyInfo static inline bool isValid(const Bar& key) noexcept { return key.v < 0x7ffffffe; } static inline Bar getTombstone() noexcept { return Bar{0x7fffffff}; } static inline Bar getEmpty() noexcept { return Bar{0x7ffffffe}; } - static inline uint64_t hash(const Bar& key) noexcept { return std::hash{}(key.v); } + static inline size_t hash(const Bar& key) noexcept { return std::hash{}(key.v); } static inline bool isEqual(const Bar& lhs, const Bar& rhs) noexcept { return lhs.v == rhs.v; } }; } // namespace Excalibur diff --git a/ExcaliburHashTest02.cpp b/ExcaliburHashTest02.cpp index 1123811..a536775 100644 --- a/ExcaliburHashTest02.cpp +++ b/ExcaliburHashTest02.cpp @@ -46,7 +46,7 @@ template <> struct KeyInfo static inline bool isValid(const ComplexStruct& key) noexcept { return key.v < 0xfffffffe; } static inline ComplexStruct getTombstone() noexcept { return ComplexStruct{0xfffffffe}; } static inline ComplexStruct getEmpty() noexcept { return ComplexStruct{0xffffffff}; } - static inline uint64_t hash(const ComplexStruct& key) noexcept { return std::hash{}(key.v); } + static inline size_t hash(const ComplexStruct& key) noexcept { return std::hash{}(key.v); } static inline bool isEqual(const ComplexStruct& lhs, const ComplexStruct& rhs) noexcept { return lhs.v == rhs.v; } }; @@ -134,7 +134,7 @@ template <> struct KeyInfo static inline bool isValid(const BadHashStruct& key) noexcept { return key.v < 0x7ffffffe; } static inline BadHashStruct getTombstone() noexcept { return BadHashStruct{0x7fffffff}; } static inline BadHashStruct getEmpty() noexcept { return BadHashStruct{0x7ffffffe}; } - static inline uint64_t hash(const BadHashStruct& /*key*/) noexcept + static inline size_t hash(const BadHashStruct& /*key*/) noexcept { // Note: this is a very bad hash function causing 100% collisions // added intentionally for the test diff --git a/ExcaliburHashTest03.cpp b/ExcaliburHashTest03.cpp index d626023..a6cd5ab 100644 --- a/ExcaliburHashTest03.cpp +++ b/ExcaliburHashTest03.cpp @@ -16,7 +16,7 @@ template <> struct KeyInfo static inline bool isValid(const CustomStruct& key) noexcept { return key.v < 0x7ffffffe; } static inline CustomStruct getTombstone() noexcept { return CustomStruct{0x7fffffff}; } static inline CustomStruct getEmpty() noexcept { return CustomStruct{0x7ffffffe}; } - static inline uint64_t hash(const CustomStruct& /*key*/) noexcept + static inline size_t hash(const CustomStruct& /*key*/) noexcept { // Note: this is a very bad hash function causing 100% collisions // added intentionally for the test @@ -92,22 +92,6 @@ TEST(SmFlatHashMap, EmplaceEdgeCase) } } -namespace Excalibur -{ -template <> struct KeyInfo -{ - static inline bool isValid(const std::string& key) noexcept { return !key.empty() && key.data()[0] != char(1); } - static inline std::string getTombstone() noexcept - { - // and let's hope that small string optimization will do the job - return std::string(1, char(1)); - } - static inline std::string getEmpty() noexcept { return std::string(); } - static inline uint64_t hash(const std::string& key) noexcept { return std::hash{}(key); } - static inline bool isEqual(const std::string& lhs, const std::string& rhs) noexcept { return lhs == rhs; } -}; -} // namespace Excalibur - TEST(SmFlatHashMap, ComplexStruct) { { diff --git a/ExcaliburHashTest04.cpp b/ExcaliburHashTest04.cpp index a9a6f79..77c76c5 100644 --- a/ExcaliburHashTest04.cpp +++ b/ExcaliburHashTest04.cpp @@ -3,22 +3,6 @@ #include #include -namespace Excalibur -{ -template <> struct KeyInfo -{ - static inline bool isValid(const std::string& key) noexcept { return !key.empty() && key.data()[0] != char(1); } - static inline std::string getTombstone() noexcept - { - // and let's hope that small string optimization will do the job - return std::string(1, char(1)); - } - static inline std::string getEmpty() noexcept { return std::string(); } - static inline uint64_t hash(const std::string& key) noexcept { return std::hash{}(key); } - static inline bool isEqual(const std::string& lhs, const std::string& rhs) noexcept { return lhs == rhs; } -}; -} // namespace Excalibur - struct ComplexValue { ComplexValue(const ComplexValue& other) diff --git a/ExcaliburHashTest05.cpp b/ExcaliburHashTest05.cpp index 0e5db7d..2d1b6b9 100644 --- a/ExcaliburHashTest05.cpp +++ b/ExcaliburHashTest05.cpp @@ -3,22 +3,6 @@ #include #include -namespace Excalibur -{ -template <> struct KeyInfo -{ - static inline bool isValid(const std::string& key) noexcept { return !key.empty() && key.data()[0] != char(1); } - static inline std::string getTombstone() noexcept - { - // and let's hope that small string optimization will do the job - return std::string(1, char(1)); - } - static inline std::string getEmpty() noexcept { return std::string(); } - static inline uint64_t hash(const std::string& key) noexcept { return std::hash{}(key); } - static inline bool isEqual(const std::string& lhs, const std::string& rhs) noexcept { return lhs == rhs; } -}; -} // namespace Excalibur - TEST(SmFlatHashMap, InlineStorageTest01) { // create hash map and insert one element diff --git a/gen_vs19_x86.cmd b/gen_vs19_x86.cmd new file mode 100644 index 0000000..c86f738 --- /dev/null +++ b/gen_vs19_x86.cmd @@ -0,0 +1,10 @@ +@echo off +set builddir=build2019_32 +if not exist %builddir% goto GENERATE +del %builddir% /S /Q +:GENERATE +mkdir %builddir% +cd %builddir% +cmake -G "Visual Studio 16 2019" -A Win32 ../ +cd .. +