From b0e561aec45e4b67602b34338ba1a6bc983a2898 Mon Sep 17 00:00:00 2001 From: SergeyMakeev Date: Sat, 3 Feb 2024 15:12:18 -0800 Subject: [PATCH 1/7] add configurable inplace storage --- CMakeLists.txt | 1 + ExcaliburHash/ExcaliburHash.h | 85 +++++++++++++++++++++++------------ ExcaliburHashTest02.cpp | 2 +- ExcaliburHashTest05.cpp | 64 ++++++++++++++++++++++++++ 4 files changed, 122 insertions(+), 30 deletions(-) create mode 100644 ExcaliburHashTest05.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 4b7e6d7..9f5d856 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,6 +18,7 @@ set(TEST_SOURCES ExcaliburHashTest02.cpp ExcaliburHashTest03.cpp ExcaliburHashTest04.cpp + ExcaliburHashTest05.cpp ) set (TEST_EXE_NAME ${PROJ_NAME}) diff --git a/ExcaliburHash/ExcaliburHash.h b/ExcaliburHash/ExcaliburHash.h index 1e26b45..05b5c08 100644 --- a/ExcaliburHash/ExcaliburHash.h +++ b/ExcaliburHash/ExcaliburHash.h @@ -112,7 +112,7 @@ TODO: Design descisions/principles TODO: Memory layout */ -template > class HashTable +template , unsigned kNumInlineItems = 1> class HashTable { struct has_values : std::bool_constant::type>::value> { @@ -241,7 +241,7 @@ template > cla if (!other.isUsingInlineStorage()) { // if not using inline storage than it's a simple pointer swap - allocateInline(TKeyInfo::getEmpty()); + constructInline(TKeyInfo::getEmpty()); m_storage = other.m_storage; m_numBuckets = other.m_numBuckets; m_numElements = other.m_numElements; @@ -252,23 +252,10 @@ template > cla else { // if using inline storage than let's move items from one inline storage into another - TItem* otherInlineItem = reinterpret_cast(&other.m_inlineStorage); - bool hasValidValue = otherInlineItem->isValid(); - TItem* inlineItem = allocateInline(std::move(*otherInlineItem->key())); + TItem* otherInlineItems = reinterpret_cast(&other.m_inlineStorage); + TItem* inlineItems = moveInline(otherInlineItems); - if constexpr (has_values::value) - { - // move inline storage value (if any) - if (hasValidValue) - { - TValue* value = inlineItem->value(); - TValue* otherValue = otherInlineItem->value(); - construct(value, std::move(*otherValue)); - destruct(otherValue); - } - } - - m_storage = inlineItem; + m_storage = inlineItems; m_numBuckets = other.m_numBuckets; m_numElements = other.m_numElements; // destruct(otherInlineItem); @@ -310,11 +297,50 @@ template > cla return (inlineStorage == m_storage); } - template inline TItem* allocateInline(Args&&... args) + template inline TItem* constructInline(Args&&... args) + { + TItem* inlineItems = reinterpret_cast(&m_inlineStorage); + for (unsigned i = 0; i < kNumInlineItems; i++) + { + construct((inlineItems + i), std::forward(args)...); + } + return inlineItems; + } + + inline TItem* moveInline(TItem* from) { - TItem* inlineItem = reinterpret_cast(&m_inlineStorage); - construct(inlineItem, std::forward(args)...); - return inlineItem; + TItem* inlineItems = reinterpret_cast(&m_inlineStorage); + + if constexpr (has_values::value) + { + // move all keys and valid values + for (unsigned i = 0; i < kNumInlineItems; i++) + { + TItem* inlineItem = (inlineItems + i); + TItem& otherInlineItem = from[i]; + const bool hasValidValue = otherInlineItem.isValid(); + construct((inlineItems + i), std::move(*otherInlineItem.key())); + + // move inline storage value (if any) + if (hasValidValue) + { + TValue* value = inlineItem->value(); + TValue* otherValue = otherInlineItem.value(); + construct(value, std::move(*otherValue)); + destruct(otherValue); + } + } + } + else + { + // move only keys + for (unsigned i = 0; i < kNumInlineItems; i++) + { + construct((inlineItems + i), std::move(*from[i].key())); + } + } + + return inlineItems; } inline uint32_t create(uint32_t numBuckets) @@ -476,7 +502,7 @@ template > cla protected: const HashTable* m_ht; TItem* m_item; - friend class HashTable; + friend class HashTable; }; class IteratorK : public IteratorBase @@ -592,10 +618,10 @@ template > cla HashTable() noexcept //: m_storage(nullptr) - : m_numBuckets(1) + : m_numBuckets(kNumInlineItems) , m_numElements(0) { - m_storage = allocateInline(TKeyInfo::getEmpty()); + m_storage = constructInline(TKeyInfo::getEmpty()); } ~HashTable() @@ -933,7 +959,7 @@ template > cla HashTable(const HashTable& other) { EXLBR_ASSERT(&other != this); - m_storage = allocateInline(TKeyInfo::getEmpty()); + m_storage = constructInline(TKeyInfo::getEmpty()); create(other.m_numBuckets); copyFrom(other); } @@ -946,7 +972,7 @@ template > cla return *this; } destroyAndFreeMemory(); - m_storage = allocateInline(TKeyInfo::getEmpty()); + m_storage = constructInline(TKeyInfo::getEmpty()); create(other.m_numBuckets); copyFrom(other); return *this; @@ -982,8 +1008,9 @@ template > cla // We need this inline storage to keep `m_storage` not null all the time. // This will save us from `empty()` check inside `find()` function implementation - typename std::aligned_storage::type m_inlineStorage; - static_assert(sizeof(m_inlineStorage) == sizeof(TItem), "Incorrect sizeof"); + static_assert(kNumInlineItems != 0, "Num inline items can't be zero!"); + typename std::aligned_storage::type m_inlineStorage; + static_assert(sizeof(m_inlineStorage) == (sizeof(TItem) * kNumInlineItems), "Incorrect sizeof"); }; } // namespace Excalibur diff --git a/ExcaliburHashTest02.cpp b/ExcaliburHashTest02.cpp index 1b89e6f..01fc143 100644 --- a/ExcaliburHashTest02.cpp +++ b/ExcaliburHashTest02.cpp @@ -60,7 +60,7 @@ TEST(SmFlatHashMap, CtorDtorCallCount) { // empty hash table - Excalibur::HashTable ht; + Excalibur::HashTable, 4> ht; EXPECT_TRUE(ht.empty()); EXPECT_EQ(ht.size(), 0u); EXPECT_GE(ht.capacity(), 0u); diff --git a/ExcaliburHashTest05.cpp b/ExcaliburHashTest05.cpp new file mode 100644 index 0000000..4996400 --- /dev/null +++ b/ExcaliburHashTest05.cpp @@ -0,0 +1,64 @@ +#include "ExcaliburHash.h" +#include "gtest/gtest.h" +#include +#include + +namespace Excalibur +{ +template <> struct KeyInfo +{ + static inline bool isValid(const std::string& key) noexcept { return !key.empty() && key.data()[0] != char(1); } + static inline std::string getTombstone() noexcept + { + // and let's hope that small string optimization will do the job + return std::string(1, char(1)); + } + static inline std::string getEmpty() noexcept { return std::string(); } + static inline uint64_t hash(const std::string& key) noexcept { return std::hash{}(key); } + static inline bool isEqual(const std::string& lhs, const std::string& rhs) noexcept { return lhs == rhs; } +}; +} // namespace Excalibur + +TEST(SmFlatHashMap, InlineStorageTest01) +{ + // create hash map and insert one element + Excalibur::HashTable, 4> ht; + + EXPECT_GE(ht.capacity(), uint32_t(4)); + + auto it1 = ht.emplace(std::string("hello1"), std::string("world1")); + EXPECT_TRUE(it1.second); + auto it2 = ht.emplace(std::string("hello2"), std::string("world2")); + EXPECT_TRUE(it2.second); + + EXPECT_EQ(ht.size(), uint32_t(2)); + + { + auto _it1 = ht.find("hello1"); + ASSERT_NE(_it1, ht.end()); + const std::string& val1 = _it1->second; + ASSERT_EQ(val1, "world1"); + + auto _it2 = ht.find("hello2"); + ASSERT_NE(_it2, ht.end()); + const std::string& val2 = _it2->second; + ASSERT_EQ(val2, "world2"); + } + + for (int i = 0; i < 1000; i++) + { + ht.emplace(std::to_string(i), "tmp"); + } + + { + auto _it1 = ht.find("hello1"); + ASSERT_NE(_it1, ht.end()); + const std::string& val1 = _it1->second; + ASSERT_EQ(val1, "world1"); + + auto _it2 = ht.find("hello2"); + ASSERT_NE(_it2, ht.end()); + const std::string& val2 = _it2->second; + ASSERT_EQ(val2, "world2"); + } +} From 45337f6b2528635edcb008310c35399a43318952 Mon Sep 17 00:00:00 2001 From: SergeyMakeev Date: Sat, 3 Feb 2024 15:41:08 -0800 Subject: [PATCH 2/7] changed template arguments order to make it less verbose --- ExcaliburHash/ExcaliburHash.h | 4 ++-- ExcaliburHashTest02.cpp | 2 +- ExcaliburHashTest05.cpp | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ExcaliburHash/ExcaliburHash.h b/ExcaliburHash/ExcaliburHash.h index 05b5c08..084b0db 100644 --- a/ExcaliburHash/ExcaliburHash.h +++ b/ExcaliburHash/ExcaliburHash.h @@ -112,7 +112,7 @@ TODO: Design descisions/principles TODO: Memory layout */ -template , unsigned kNumInlineItems = 1> class HashTable +template > class HashTable { struct has_values : std::bool_constant::type>::value> { @@ -502,7 +502,7 @@ template , uns protected: const HashTable* m_ht; TItem* m_item; - friend class HashTable; + friend class HashTable; }; class IteratorK : public IteratorBase diff --git a/ExcaliburHashTest02.cpp b/ExcaliburHashTest02.cpp index 01fc143..1123811 100644 --- a/ExcaliburHashTest02.cpp +++ b/ExcaliburHashTest02.cpp @@ -60,7 +60,7 @@ TEST(SmFlatHashMap, CtorDtorCallCount) { // empty hash table - Excalibur::HashTable, 4> ht; + Excalibur::HashTable ht; EXPECT_TRUE(ht.empty()); EXPECT_EQ(ht.size(), 0u); EXPECT_GE(ht.capacity(), 0u); diff --git a/ExcaliburHashTest05.cpp b/ExcaliburHashTest05.cpp index 4996400..116ea53 100644 --- a/ExcaliburHashTest05.cpp +++ b/ExcaliburHashTest05.cpp @@ -22,7 +22,7 @@ template <> struct KeyInfo TEST(SmFlatHashMap, InlineStorageTest01) { // create hash map and insert one element - Excalibur::HashTable, 4> ht; + Excalibur::HashTable ht; EXPECT_GE(ht.capacity(), uint32_t(4)); From d05e10b1c0b99c9839888f0705cf87cab8f3db6e Mon Sep 17 00:00:00 2001 From: SergeyMakeev Date: Sat, 3 Feb 2024 15:45:02 -0800 Subject: [PATCH 3/7] add pow2 static assert --- ExcaliburHash/ExcaliburHash.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/ExcaliburHash/ExcaliburHash.h b/ExcaliburHash/ExcaliburHash.h index 084b0db..9ae77f7 100644 --- a/ExcaliburHash/ExcaliburHash.h +++ b/ExcaliburHash/ExcaliburHash.h @@ -320,7 +320,7 @@ template ((inlineItems + i), std::move(*otherInlineItem.key())); - + // move inline storage value (if any) if (hasValidValue) { @@ -333,7 +333,7 @@ template ((inlineItems + i), std::move(*from[i].key())); @@ -1006,9 +1006,16 @@ template inline static constexpr bool isPow2(INTEGRAL_TYPE x) noexcept + { + static_assert(std::is_integral::value, "isPow2 must be called on an integer type."); + return (x & (x - 1)) == 0 && (x != 0); + } + // We need this inline storage to keep `m_storage` not null all the time. // This will save us from `empty()` check inside `find()` function implementation static_assert(kNumInlineItems != 0, "Num inline items can't be zero!"); + static_assert(isPow2(kNumInlineItems), "Num inline items should be power of two"); typename std::aligned_storage::type m_inlineStorage; static_assert(sizeof(m_inlineStorage) == (sizeof(TItem) * kNumInlineItems), "Incorrect sizeof"); }; From ae34c6f386e4879292ab86b031ceff6979349431 Mon Sep 17 00:00:00 2001 From: SergeyMakeev Date: Sat, 3 Feb 2024 19:07:36 -0800 Subject: [PATCH 4/7] cleanup --- ExcaliburHash/ExcaliburHash.h | 43 ++++------------------------------- 1 file changed, 5 insertions(+), 38 deletions(-) diff --git a/ExcaliburHash/ExcaliburHash.h b/ExcaliburHash/ExcaliburHash.h index 9ae77f7..0fdc6b2 100644 --- a/ExcaliburHash/ExcaliburHash.h +++ b/ExcaliburHash/ExcaliburHash.h @@ -258,7 +258,6 @@ template value(); TValue* otherValue = otherInlineItem.value(); construct(value, std::move(*otherValue)); - destruct(otherValue); + + if constexpr (!std::is_trivially_destructible::value) + { + destruct(otherValue); + } } } } @@ -829,42 +832,6 @@ template ::end(*this)) - { - return false; - } - - EXLBR_ASSERT(m_numElements != 0); - m_numElements--; - - if constexpr ((!std::is_trivially_destructible::value) && (has_values::value)) - { - TValue* itemValue = const_cast(it.getValue()); - destruct(itemValue); - } - - // hash table now is empty. convert all tombstones to empty keys - if (m_numElements == 0) - { - TItem* EXLBR_RESTRICT item = m_storage; - TItem* const endItem = item + m_numBuckets; - for (; item != endItem; item++) - { - *item->key() = TKeyInfo::getEmpty(); - } - return true; - } - - // overwrite key with empty key - TKey* itemKey = const_cast(it.getKey()); - *itemKey = TKeyInfo::getTombstone(); - return true; - } - */ - inline bool erase(const TKey& key) { auto it = find(key); From 36c99a9f61588b3ed8d1b0526da84efe239ed341 Mon Sep 17 00:00:00 2001 From: SergeyMakeev Date: Sat, 3 Feb 2024 19:34:35 -0800 Subject: [PATCH 5/7] fix error with pair returning the wrong value when hashtable grows + add aliases + operator[] optimization --- ExcaliburHash/ExcaliburHash.h | 25 ++++++++++++------- ExcaliburHashTest05.cpp | 46 +++++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+), 9 deletions(-) diff --git a/ExcaliburHash/ExcaliburHash.h b/ExcaliburHash/ExcaliburHash.h index 0fdc6b2..3f5cdfb 100644 --- a/ExcaliburHash/ExcaliburHash.h +++ b/ExcaliburHash/ExcaliburHash.h @@ -733,6 +733,15 @@ template second); // <--- when hash table grows it->second will point to a memory we are about to free - auto it = emplaceToExisting(numBucketsNew, key, args...); + std::pair it = emplaceToExisting(numBucketsNew, key, args...); reinsert(numBucketsNew, item, enditem); @@ -873,14 +882,6 @@ template emplaceIt = emplace(key); return emplaceIt.first.value(); } @@ -987,4 +988,10 @@ template using HashMap = HashTable>; + +// hashset declaration +template using HashSet = HashTable>; + } // namespace Excalibur diff --git a/ExcaliburHashTest05.cpp b/ExcaliburHashTest05.cpp index 116ea53..3095ef1 100644 --- a/ExcaliburHashTest05.cpp +++ b/ExcaliburHashTest05.cpp @@ -61,4 +61,50 @@ TEST(SmFlatHashMap, InlineStorageTest01) const std::string& val2 = _it2->second; ASSERT_EQ(val2, "world2"); } +} + + +TEST(SmFlatHashMap, AliasNameTest) +{ + { + Excalibur::HashMap hm; + auto it1 = hm.emplace(1, 2); + EXPECT_TRUE(it1.second); + auto it2 = hm.emplace(2, 3); + EXPECT_TRUE(it2.second); + + auto _it1 = hm.find(1); + ASSERT_NE(_it1, hm.end()); + + auto _it2 = hm.find(2); + ASSERT_NE(_it2, hm.end()); + + auto _it3 = hm.find(3); + ASSERT_EQ(_it3, hm.end()); + + const int& val1 = _it1->second; + const int& val2 = _it2->second; + ASSERT_EQ(val1, 2); + ASSERT_EQ(val2, 3); + } + + { + Excalibur::HashSet hs; + auto it1 = hs.emplace(1); + EXPECT_TRUE(it1.second); + auto it2 = hs.emplace(1); + EXPECT_FALSE(it2.second); + auto it3 = hs.emplace(2); + EXPECT_TRUE(it3.second); + + EXPECT_TRUE(hs.has(1)); + EXPECT_TRUE(hs.has(2)); + EXPECT_FALSE(hs.has(3)); + } + + + + + + } From 26fa430ab4e5d997374a9029adf2e9e8b3e444d7 Mon Sep 17 00:00:00 2001 From: SergeyMakeev Date: Sat, 3 Feb 2024 19:59:27 -0800 Subject: [PATCH 6/7] fix clang/gcc --- ExcaliburHash/ExcaliburHash.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ExcaliburHash/ExcaliburHash.h b/ExcaliburHash/ExcaliburHash.h index 3f5cdfb..79f9d68 100644 --- a/ExcaliburHash/ExcaliburHash.h +++ b/ExcaliburHash/ExcaliburHash.h @@ -992,6 +992,6 @@ template using HashMap = HashTable>; // hashset declaration -template using HashSet = HashTable>; +template using HashSet = HashTable>; } // namespace Excalibur From 7691028c0e83304097b08065d35ba4075d9a9eaf Mon Sep 17 00:00:00 2001 From: Sergey Makeev Date: Sat, 3 Feb 2024 21:37:52 -0800 Subject: [PATCH 7/7] Update README.md --- README.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/README.md b/README.md index 7656eae..4a43378 100644 --- a/README.md +++ b/README.md @@ -14,6 +14,14 @@ It uses an open addressing hash table and manages removed items with a method ca Engineered for ease of use, Excalibur Hash, in a vast majority of cases (99%), serves as a seamless, drop-in alternative to std::unordered_map. However, it's important to note that Excalibur Hash does not guarantee stable addressing. So, if your project needs to hold direct pointers to the keys or values, Excalibur Hash might not work as you expect. This aside, its design and efficiency make it a great choice for applications where speed is crucial. +## Features + +1. Extremely fast (see Performance section for details) +2. CPU cache friendly +3. Built-in configurable inline storage +4. Can either work as a map (key, value) or as a set (keys only) + + ## Performance In this section, you can see a performance comparison against a few popular hash table implementations.