Skip to content

Commit

Permalink
refactor out KeyInfo + add wyhash
Browse files Browse the repository at this point in the history
  • Loading branch information
SergeyMakeev committed Mar 4, 2024
1 parent 9000354 commit a8f3bf2
Show file tree
Hide file tree
Showing 11 changed files with 211 additions and 109 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
build2019
build2019_32
build2022
build2022_32
build
codecov_report
*.log
Expand Down
3 changes: 2 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
cmake_minimum_required(VERSION 2.8.12)
cmake_minimum_required(VERSION 3.6)

# set(CMAKE_CONFIGURATION_TYPES "Debug;Release" CACHE STRING "" FORCE)

Expand All @@ -23,6 +23,7 @@ set(TEST_SOURCES

set (TEST_EXE_NAME ${PROJ_NAME})
add_executable(${TEST_EXE_NAME} ${TEST_SOURCES})
set_property(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY VS_STARTUP_PROJECT ${TEST_EXE_NAME})

set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
add_subdirectory("${PROJECT_SOURCE_DIR}/extern/googletest" "extern/googletest")
Expand Down
66 changes: 10 additions & 56 deletions ExcaliburHash/ExcaliburHash.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,53 +52,7 @@
#endif
#endif

namespace Excalibur
{

// generic type (without implementation)
template <typename T> struct KeyInfo
{
// static inline T getTombstone() noexcept;
// static inline T getEmpty() noexcept;
// static inline uint64_t hash(const T& key) noexcept;
// static inline bool isEqual(const T& lhs, const T& rhs) noexcept;
// static inline bool isValid(const T& key) noexcept;
};

template <> struct KeyInfo<int>
{
static inline bool isValid(const int& key) noexcept { return key < 0x7ffffffe; }
static inline int getTombstone() noexcept { return 0x7fffffff; }
static inline int getEmpty() noexcept { return 0x7ffffffe; }
static inline uint64_t hash(const int& key) noexcept { return key; }
static inline bool isEqual(const int& lhs, const int& rhs) noexcept { return lhs == rhs; }
};

/*
template <> struct KeyInfo<uint32_t>
{
static inline bool isValid(const uint32_t& key) noexcept { return key < 0xfffffffe; }
static inline uint32_t getTombstone() noexcept { return 0xfffffffe; }
static inline uint32_t getEmpty() noexcept { return 0xffffffff; }
static inline uint64_t hash(const uint32_t& key) noexcept { return key; }
static inline bool isEqual(const uint32_t& lhs, const uint32_t& rhs) noexcept { return lhs == rhs; }
};
template <> struct KeyInfo<std::string>
{
static inline bool isValid(const std::string& key) noexcept { return !key.empty() && key.data()[0] != char(1); }
static inline std::string getTombstone() noexcept
{
// and let's hope that small string optimization will do the job
return std::string(1, char(1));
}
static inline std::string getEmpty() noexcept { return std::string(); }
static inline uint64_t hash(const std::string& key) noexcept { return std::hash<std::string>{}(key); }
static inline bool isEqual(const std::string& lhs, const std::string& rhs) noexcept { return lhs == rhs; }
};
*/

} // namespace Excalibur
#include <ExcaliburKeyInfo.h>

namespace Excalibur
{
Expand Down Expand Up @@ -416,11 +370,11 @@ template <typename TKey, typename TValue, unsigned kNumInlineItems = 1, typename
{
EXLBR_ASSERT(!TKeyInfo::isEqual(TKeyInfo::getTombstone(), key));
EXLBR_ASSERT(!TKeyInfo::isEqual(TKeyInfo::getEmpty(), key));
const uint32_t numBuckets = m_numBuckets;
const size_t numBuckets = m_numBuckets;
TItem* const firstItem = m_storage;
TItem* const endItem = firstItem + numBuckets;
const uint64_t hashValue = TKeyInfo::hash(key);
uint32_t bucketIndex = uint32_t(hashValue) & (numBuckets - 1);
const size_t hashValue = TKeyInfo::hash(key);
const size_t bucketIndex = hashValue & (numBuckets - 1);
TItem* startItem = firstItem + bucketIndex;
TItem* EXLBR_RESTRICT currentItem = startItem;
do
Expand Down Expand Up @@ -674,13 +628,13 @@ template <typename TKey, typename TValue, unsigned kNumInlineItems = 1, typename

private:
template <typename TK, class... Args>
inline std::pair<IteratorKV, bool> emplaceToExisting(uint32_t numBuckets, TK&& key, Args&&... args)
inline std::pair<IteratorKV, bool> emplaceToExisting(size_t numBuckets, TK&& key, Args&&... args)
{
// numBuckets has to be power-of-two
EXLBR_ASSERT(numBuckets > 0);
EXLBR_ASSERT((numBuckets & (numBuckets - 1)) == 0);
const uint64_t hashValue = TKeyInfo::hash(key);
const uint32_t bucketIndex = uint32_t(hashValue) & (numBuckets - 1);
const size_t hashValue = TKeyInfo::hash(key);
const size_t bucketIndex = hashValue & (numBuckets - 1);
TItem* const firstItem = m_storage;
TItem* const endItem = firstItem + numBuckets;
TItem* EXLBR_RESTRICT currentItem = firstItem + bucketIndex;
Expand Down Expand Up @@ -715,7 +669,7 @@ template <typename TKey, typename TValue, unsigned kNumInlineItems = 1, typename
}
}

inline void reinsert(uint32_t numBucketsNew, TItem* EXLBR_RESTRICT item, TItem* const enditem) noexcept
inline void reinsert(size_t numBucketsNew, TItem* EXLBR_RESTRICT item, TItem* const enditem) noexcept
{
// re-insert existing elements
for (; item != enditem; item++)
Expand Down Expand Up @@ -769,9 +723,9 @@ template <typename TKey, typename TValue, unsigned kNumInlineItems = 1, typename
// i.e.
// auto it = table.find("key");
// table.emplace("another_key", it->second); // <--- when hash table grows it->second will point to a memory we are about to free
std::pair<IteratorKV, bool> it = emplaceToExisting(numBucketsNew, key, args...);
std::pair<IteratorKV, bool> it = emplaceToExisting(size_t(numBucketsNew), key, args...);

reinsert(numBucketsNew, item, enditem);
reinsert(size_t(numBucketsNew), item, enditem);

if (!isInlineStorage)
{
Expand Down
70 changes: 70 additions & 0 deletions ExcaliburHash/ExcaliburKeyInfo.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
#pragma once

#include <string>

#include "wyhash.h"

namespace Excalibur
{

// generic type (without implementation)
template <typename T> struct KeyInfo
{
// static inline T getTombstone() noexcept;
// static inline T getEmpty() noexcept;
// static inline size_t hash(const T& key) noexcept;
// static inline bool isEqual(const T& lhs, const T& rhs) noexcept;
// static inline bool isValid(const T& key) noexcept;
};

template <> struct KeyInfo<int32_t>
{
static inline bool isValid(const int32_t& key) noexcept { return key < INT32_C(0x7ffffffe); }
static inline int32_t getTombstone() noexcept { return INT32_C(0x7fffffff); }
static inline int32_t getEmpty() noexcept { return INT32_C(0x7ffffffe); }
static inline size_t hash(const int32_t& key) noexcept { return Excalibur::wyhash::hash(key); }
static inline bool isEqual(const int32_t& lhs, const int32_t& rhs) noexcept { return lhs == rhs; }
};

template <> struct KeyInfo<uint32_t>
{
static inline bool isValid(const uint32_t& key) noexcept { return key < UINT32_C(0xfffffffe); }
static inline uint32_t getTombstone() noexcept {return UINT32_C(0xfffffffe); }
static inline uint32_t getEmpty() noexcept {return UINT32_C(0xffffffff); }
static inline size_t hash(const uint32_t& key) noexcept { return Excalibur::wyhash::hash(key); }
static inline bool isEqual(const uint32_t& lhs, const uint32_t& rhs) noexcept { return lhs == rhs; }
};

template <> struct KeyInfo<int64_t>
{
static inline bool isValid(const int64_t& key) noexcept { return key < INT64_C(0x7ffffffffffffffe); }
static inline int64_t getTombstone() noexcept { return INT64_C(0x7fffffffffffffff); }
static inline int64_t getEmpty() noexcept { return INT64_C(0x7ffffffffffffffe); }
static inline size_t hash(const int64_t& key) noexcept { return Excalibur::wyhash::hash(key); }
static inline bool isEqual(const int64_t& lhs, const int64_t& rhs) noexcept { return lhs == rhs; }
};

template <> struct KeyInfo<uint64_t>
{
static inline bool isValid(const uint64_t& key) noexcept { return key < UINT64_C(0xfffffffffffffffe); }
static inline uint64_t getTombstone() noexcept { return UINT64_C(0xfffffffffffffffe); }
static inline uint64_t getEmpty() noexcept { return UINT64_C(0xffffffffffffffff); }
static inline size_t hash(const uint64_t& key) noexcept { return Excalibur::wyhash::hash(key); }
static inline bool isEqual(const uint64_t& lhs, const uint64_t& rhs) noexcept { return lhs == rhs; }
};

template <> struct KeyInfo<std::string>
{
static inline bool isValid(const std::string& key) noexcept { return !key.empty() && key.data()[0] != char(1); }
static inline std::string getTombstone() noexcept
{
// and let's hope that small string optimization will do the job
return std::string(1, char(1));
}
static inline std::string getEmpty() noexcept { return std::string(); }
static inline size_t hash(const std::string& key) noexcept { return std::hash<std::string>{}(key); }
static inline bool isEqual(const std::string& lhs, const std::string& rhs) noexcept { return lhs == rhs; }
};


} // namespace Excalibur
113 changes: 113 additions & 0 deletions ExcaliburHash/wyhash.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
#pragma once

#include <stdint.h>

#if defined(_MSC_VER)
#define EXLBR_VISUAL_STUDIO (1)
#define EXLBR_FORCE_INLINE __forceinline
#endif

#if defined(__clang__)
#define EXLBR_CLANG (1)
#define EXLBR_FORCE_INLINE __attribute__((always_inline))
#endif

#if defined(__GNUC__)
#define EXLBR_GCC (1)
#endif

#if defined(_M_X64) || defined(__aarch64__) || defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || defined(__amd64) || \
defined(__LP64__) || defined(_WIN64)
#define EXLBR_64 (1)
#else
#define EXLBR_32 (1)
#endif

#if EXLBR_VISUAL_STUDIO
#include <intrin.h>
#endif

namespace Excalibur
{

namespace wyhash
{

#if EXLBR_64

inline uint64_t _hash64(uint64_t v)
{
#if EXLBR_VISUAL_STUDIO
{
uint64_t h;
uint64_t l = _umul128(v, UINT64_C(0x9E3779B97F4A7C15), &h);
return l ^ h;
}
#elif
{
__uint128_t r = v;
r *= UINT64_C(0x9E3779B97F4A7C15);
return (uint64_t)(r >> 64U) ^ (uint64_t)(r);
}
#else
{
#error Unsupported compiler or platform
}
#endif
}

#elif EXLBR_32

inline uint32_t _hash32(uint32_t v)
{
#if EXLBR_VISUAL_STUDIO
{
uint64_t lh = __emulu(v, UINT32_C(0x9e3779b1));
return (uint32_t)(lh >> 32U) ^ (uint32_t)(lh);
}
#elif
{
uint64_t lh = uint64_t(v) * uint64_t(0x9e3779b1);
return (uint32_t)(lh >> 32U) ^ (uint32_t)(lh);
}
#else
{
#error Unsupported compiler or platform
}
#endif
}
#else
#error Unsupported compiler or platform
#endif

inline size_t hash(uint64_t v)
{
#if EXLBR_64
return _hash64(v);
#elif EXLBR_32
uint32_t vv = (uint32_t)(v >> 32U) ^ (uint32_t)(v);
return _hash32(vv);
#else
#error Unsupported compiler or platform
#endif
}

inline size_t hash(uint32_t v)
{
#if EXLBR_64
return _hash64(uint64_t(v));
#elif EXLBR_32
return _hash32(v);
#else
#error Unsupported compiler or platform
#endif
}


inline size_t hash(int64_t v) { return hash(uint64_t(v)); }
inline size_t hash(int32_t v) { return hash(uint32_t(v)); }


} // namespace wyhash

} // namespace Excalibur
2 changes: 1 addition & 1 deletion ExcaliburHashTest01.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@ template <> struct KeyInfo<Bar>
static inline bool isValid(const Bar& key) noexcept { return key.v < 0x7ffffffe; }
static inline Bar getTombstone() noexcept { return Bar{0x7fffffff}; }
static inline Bar getEmpty() noexcept { return Bar{0x7ffffffe}; }
static inline uint64_t hash(const Bar& key) noexcept { return std::hash<int>{}(key.v); }
static inline size_t hash(const Bar& key) noexcept { return std::hash<int>{}(key.v); }
static inline bool isEqual(const Bar& lhs, const Bar& rhs) noexcept { return lhs.v == rhs.v; }
};
} // namespace Excalibur
Expand Down
4 changes: 2 additions & 2 deletions ExcaliburHashTest02.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ template <> struct KeyInfo<ComplexStruct>
static inline bool isValid(const ComplexStruct& key) noexcept { return key.v < 0xfffffffe; }
static inline ComplexStruct getTombstone() noexcept { return ComplexStruct{0xfffffffe}; }
static inline ComplexStruct getEmpty() noexcept { return ComplexStruct{0xffffffff}; }
static inline uint64_t hash(const ComplexStruct& key) noexcept { return std::hash<uint32_t>{}(key.v); }
static inline size_t hash(const ComplexStruct& key) noexcept { return std::hash<uint32_t>{}(key.v); }
static inline bool isEqual(const ComplexStruct& lhs, const ComplexStruct& rhs) noexcept { return lhs.v == rhs.v; }
};

Expand Down Expand Up @@ -134,7 +134,7 @@ template <> struct KeyInfo<BadHashStruct>
static inline bool isValid(const BadHashStruct& key) noexcept { return key.v < 0x7ffffffe; }
static inline BadHashStruct getTombstone() noexcept { return BadHashStruct{0x7fffffff}; }
static inline BadHashStruct getEmpty() noexcept { return BadHashStruct{0x7ffffffe}; }
static inline uint64_t hash(const BadHashStruct& /*key*/) noexcept
static inline size_t hash(const BadHashStruct& /*key*/) noexcept
{
// Note: this is a very bad hash function causing 100% collisions
// added intentionally for the test
Expand Down
18 changes: 1 addition & 17 deletions ExcaliburHashTest03.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ template <> struct KeyInfo<CustomStruct>
static inline bool isValid(const CustomStruct& key) noexcept { return key.v < 0x7ffffffe; }
static inline CustomStruct getTombstone() noexcept { return CustomStruct{0x7fffffff}; }
static inline CustomStruct getEmpty() noexcept { return CustomStruct{0x7ffffffe}; }
static inline uint64_t hash(const CustomStruct& /*key*/) noexcept
static inline size_t hash(const CustomStruct& /*key*/) noexcept
{
// Note: this is a very bad hash function causing 100% collisions
// added intentionally for the test
Expand Down Expand Up @@ -92,22 +92,6 @@ TEST(SmFlatHashMap, EmplaceEdgeCase)
}
}

namespace Excalibur
{
template <> struct KeyInfo<std::string>
{
static inline bool isValid(const std::string& key) noexcept { return !key.empty() && key.data()[0] != char(1); }
static inline std::string getTombstone() noexcept
{
// and let's hope that small string optimization will do the job
return std::string(1, char(1));
}
static inline std::string getEmpty() noexcept { return std::string(); }
static inline uint64_t hash(const std::string& key) noexcept { return std::hash<std::string>{}(key); }
static inline bool isEqual(const std::string& lhs, const std::string& rhs) noexcept { return lhs == rhs; }
};
} // namespace Excalibur

TEST(SmFlatHashMap, ComplexStruct)
{
{
Expand Down
16 changes: 0 additions & 16 deletions ExcaliburHashTest04.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,6 @@
#include <array>
#include <cstring>

namespace Excalibur
{
template <> struct KeyInfo<std::string>
{
static inline bool isValid(const std::string& key) noexcept { return !key.empty() && key.data()[0] != char(1); }
static inline std::string getTombstone() noexcept
{
// and let's hope that small string optimization will do the job
return std::string(1, char(1));
}
static inline std::string getEmpty() noexcept { return std::string(); }
static inline uint64_t hash(const std::string& key) noexcept { return std::hash<std::string>{}(key); }
static inline bool isEqual(const std::string& lhs, const std::string& rhs) noexcept { return lhs == rhs; }
};
} // namespace Excalibur

struct ComplexValue
{
ComplexValue(const ComplexValue& other)
Expand Down

0 comments on commit a8f3bf2

Please sign in to comment.