Skip to content

Commit

Permalink
Merge pull request #10 from SergeyMakeev/configurable_inplace_storage
Browse files Browse the repository at this point in the history
- Add configurable inplace storage
- Fix the issue with an incorrect second argument returned from pair<it, bool>emplace(); when the hash table grows
- Performance optimizations
- Add more tests
  • Loading branch information
SergeyMakeev committed Feb 4, 2024
2 parents 2cd900f + 7691028 commit 7d001e9
Show file tree
Hide file tree
Showing 5 changed files with 203 additions and 76 deletions.
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ set(TEST_SOURCES
ExcaliburHashTest02.cpp
ExcaliburHashTest03.cpp
ExcaliburHashTest04.cpp
ExcaliburHashTest05.cpp
)

set (TEST_EXE_NAME ${PROJ_NAME})
Expand Down
158 changes: 83 additions & 75 deletions ExcaliburHash/ExcaliburHash.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ TODO: Design descisions/principles
TODO: Memory layout
*/
template <typename TKey, typename TValue, typename TKeyInfo = KeyInfo<TKey>> class HashTable
template <typename TKey, typename TValue, unsigned kNumInlineItems = 1, typename TKeyInfo = KeyInfo<TKey>> class HashTable
{
struct has_values : std::bool_constant<!std::is_same<std::nullptr_t, typename std::remove_reference<TValue>::type>::value>
{
Expand Down Expand Up @@ -241,7 +241,7 @@ template <typename TKey, typename TValue, typename TKeyInfo = KeyInfo<TKey>> cla
if (!other.isUsingInlineStorage())
{
// if not using inline storage than it's a simple pointer swap
allocateInline(TKeyInfo::getEmpty());
constructInline(TKeyInfo::getEmpty());
m_storage = other.m_storage;
m_numBuckets = other.m_numBuckets;
m_numElements = other.m_numElements;
Expand All @@ -252,26 +252,12 @@ template <typename TKey, typename TValue, typename TKeyInfo = KeyInfo<TKey>> cla
else
{
// if using inline storage than let's move items from one inline storage into another
TItem* otherInlineItem = reinterpret_cast<TItem*>(&other.m_inlineStorage);
bool hasValidValue = otherInlineItem->isValid();
TItem* inlineItem = allocateInline(std::move(*otherInlineItem->key()));
TItem* otherInlineItems = reinterpret_cast<TItem*>(&other.m_inlineStorage);
TItem* inlineItems = moveInline(otherInlineItems);

if constexpr (has_values::value)
{
// move inline storage value (if any)
if (hasValidValue)
{
TValue* value = inlineItem->value();
TValue* otherValue = otherInlineItem->value();
construct<TValue>(value, std::move(*otherValue));
destruct(otherValue);
}
}

m_storage = inlineItem;
m_storage = inlineItems;
m_numBuckets = other.m_numBuckets;
m_numElements = other.m_numElements;
// destruct(otherInlineItem);
other.m_storage = nullptr;
// other.m_numBuckets = 0;
// other.m_numElements = 0;
Expand Down Expand Up @@ -310,11 +296,54 @@ template <typename TKey, typename TValue, typename TKeyInfo = KeyInfo<TKey>> cla
return (inlineStorage == m_storage);
}

template <class... Args> inline TItem* allocateInline(Args&&... args)
template <class... Args> inline TItem* constructInline(Args&&... args)
{
TItem* inlineItem = reinterpret_cast<TItem*>(&m_inlineStorage);
construct<TItem>(inlineItem, std::forward<Args>(args)...);
return inlineItem;
TItem* inlineItems = reinterpret_cast<TItem*>(&m_inlineStorage);
for (unsigned i = 0; i < kNumInlineItems; i++)
{
construct<TItem>((inlineItems + i), std::forward<Args>(args)...);
}
return inlineItems;
}

inline TItem* moveInline(TItem* from)
{
TItem* inlineItems = reinterpret_cast<TItem*>(&m_inlineStorage);

if constexpr (has_values::value)
{
// move all keys and valid values
for (unsigned i = 0; i < kNumInlineItems; i++)
{
TItem* inlineItem = (inlineItems + i);
TItem& otherInlineItem = from[i];
const bool hasValidValue = otherInlineItem.isValid();
construct<TItem>((inlineItems + i), std::move(*otherInlineItem.key()));

// move inline storage value (if any)
if (hasValidValue)
{
TValue* value = inlineItem->value();
TValue* otherValue = otherInlineItem.value();
construct<TValue>(value, std::move(*otherValue));

if constexpr (!std::is_trivially_destructible<TValue>::value)
{
destruct(otherValue);
}
}
}
}
else
{
// move only keys
for (unsigned i = 0; i < kNumInlineItems; i++)
{
construct<TItem>((inlineItems + i), std::move(*from[i].key()));
}
}

return inlineItems;
}

inline uint32_t create(uint32_t numBuckets)
Expand Down Expand Up @@ -476,7 +505,7 @@ template <typename TKey, typename TValue, typename TKeyInfo = KeyInfo<TKey>> cla
protected:
const HashTable* m_ht;
TItem* m_item;
friend class HashTable<TKey, TValue, TKeyInfo>;
friend class HashTable<TKey, TValue, kNumInlineItems, TKeyInfo>;
};

class IteratorK : public IteratorBase
Expand Down Expand Up @@ -592,10 +621,10 @@ template <typename TKey, typename TValue, typename TKeyInfo = KeyInfo<TKey>> cla

HashTable() noexcept
//: m_storage(nullptr)
: m_numBuckets(1)
: m_numBuckets(kNumInlineItems)
, m_numElements(0)
{
m_storage = allocateInline(TKeyInfo::getEmpty());
m_storage = constructInline(TKeyInfo::getEmpty());
}

~HashTable()
Expand Down Expand Up @@ -704,6 +733,15 @@ template <typename TKey, typename TValue, typename TKeyInfo = KeyInfo<TKey>> cla
TItem* storage = m_storage;
TItem* EXLBR_RESTRICT item = storage;
TItem* const enditem = item + numBuckets;

// check if such element is already exist
// in this case we don't need to do anything
TItem* existingItem = findImpl(key);
if (existingItem != enditem)
{
return std::make_pair(IteratorKV(this, existingItem), false);
}

bool isInlineStorage = isUsingInlineStorage();

numBucketsNew = create(numBucketsNew);
Expand All @@ -714,7 +752,7 @@ template <typename TKey, typename TValue, typename TKeyInfo = KeyInfo<TKey>> cla
// i.e.
// auto it = table.find("key");
// table.emplace("another_key", it->second); // <--- when hash table grows it->second will point to a memory we are about to free
auto it = emplaceToExisting(numBucketsNew, key, args...);
std::pair<IteratorKV, bool> it = emplaceToExisting(numBucketsNew, key, args...);

reinsert(numBucketsNew, item, enditem);

Expand Down Expand Up @@ -803,42 +841,6 @@ template <typename TKey, typename TValue, typename TKeyInfo = KeyInfo<TKey>> cla
return ConstIteratorKV(this, item);
}

/*
inline bool erase(const IteratorBase it)
{
if (it == IteratorHelper<IteratorBase>::end(*this))
{
return false;
}
EXLBR_ASSERT(m_numElements != 0);
m_numElements--;
if constexpr ((!std::is_trivially_destructible<TValue>::value) && (has_values::value))
{
TValue* itemValue = const_cast<TValue*>(it.getValue());
destruct(itemValue);
}
// hash table now is empty. convert all tombstones to empty keys
if (m_numElements == 0)
{
TItem* EXLBR_RESTRICT item = m_storage;
TItem* const endItem = item + m_numBuckets;
for (; item != endItem; item++)
{
*item->key() = TKeyInfo::getEmpty();
}
return true;
}
// overwrite key with empty key
TKey* itemKey = const_cast<TKey*>(it.getKey());
*itemKey = TKeyInfo::getTombstone();
return true;
}
*/

inline bool erase(const TKey& key)
{
auto it = find(key);
Expand Down Expand Up @@ -880,14 +882,6 @@ template <typename TKey, typename TValue, typename TKeyInfo = KeyInfo<TKey>> cla

inline TValue& operator[](const TKey& key)
{
IteratorKV it = find(key);
if (it != iend())
{
return it.value();
}
// note: we can not use `emplace()` without calling `find()` function first
// because calling `emplace()` function might grow the hash table even if a key exists in the table (which will invalidate existing
// iterators)
std::pair<IteratorKV, bool> emplaceIt = emplace(key);
return emplaceIt.first.value();
}
Expand Down Expand Up @@ -933,7 +927,7 @@ template <typename TKey, typename TValue, typename TKeyInfo = KeyInfo<TKey>> cla
HashTable(const HashTable& other)
{
EXLBR_ASSERT(&other != this);
m_storage = allocateInline(TKeyInfo::getEmpty());
m_storage = constructInline(TKeyInfo::getEmpty());
create(other.m_numBuckets);
copyFrom(other);
}
Expand All @@ -946,7 +940,7 @@ template <typename TKey, typename TValue, typename TKeyInfo = KeyInfo<TKey>> cla
return *this;
}
destroyAndFreeMemory();
m_storage = allocateInline(TKeyInfo::getEmpty());
m_storage = constructInline(TKeyInfo::getEmpty());
create(other.m_numBuckets);
copyFrom(other);
return *this;
Expand Down Expand Up @@ -980,10 +974,24 @@ template <typename TKey, typename TValue, typename TKeyInfo = KeyInfo<TKey>> cla
uint32_t m_numBuckets; // 4
uint32_t m_numElements; // 4

template <typename INTEGRAL_TYPE> inline static constexpr bool isPow2(INTEGRAL_TYPE x) noexcept
{
static_assert(std::is_integral<INTEGRAL_TYPE>::value, "isPow2 must be called on an integer type.");
return (x & (x - 1)) == 0 && (x != 0);
}

// We need this inline storage to keep `m_storage` not null all the time.
// This will save us from `empty()` check inside `find()` function implementation
typename std::aligned_storage<sizeof(TItem), alignof(TItem)>::type m_inlineStorage;
static_assert(sizeof(m_inlineStorage) == sizeof(TItem), "Incorrect sizeof");
static_assert(kNumInlineItems != 0, "Num inline items can't be zero!");
static_assert(isPow2(kNumInlineItems), "Num inline items should be power of two");
typename std::aligned_storage<sizeof(TItem) * kNumInlineItems, alignof(TItem)>::type m_inlineStorage;
static_assert(sizeof(m_inlineStorage) == (sizeof(TItem) * kNumInlineItems), "Incorrect sizeof");
};

// hashmap declaration
template <typename TKey, typename TValue> using HashMap = HashTable<TKey, TValue, 1, KeyInfo<TKey>>;

// hashset declaration
template <typename TKey> using HashSet = HashTable<TKey, std::nullptr_t, 1, KeyInfo<TKey>>;

} // namespace Excalibur
2 changes: 1 addition & 1 deletion ExcaliburHashTest02.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ TEST(SmFlatHashMap, CtorDtorCallCount)

{
// empty hash table
Excalibur::HashTable<ComplexStruct, int> ht;
Excalibur::HashTable<ComplexStruct, int, 4> ht;
EXPECT_TRUE(ht.empty());
EXPECT_EQ(ht.size(), 0u);
EXPECT_GE(ht.capacity(), 0u);
Expand Down
110 changes: 110 additions & 0 deletions ExcaliburHashTest05.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
#include "ExcaliburHash.h"
#include "gtest/gtest.h"
#include <array>
#include <cstring>

namespace Excalibur
{
template <> struct KeyInfo<std::string>
{
static inline bool isValid(const std::string& key) noexcept { return !key.empty() && key.data()[0] != char(1); }
static inline std::string getTombstone() noexcept
{
// and let's hope that small string optimization will do the job
return std::string(1, char(1));
}
static inline std::string getEmpty() noexcept { return std::string(); }
static inline uint64_t hash(const std::string& key) noexcept { return std::hash<std::string>{}(key); }
static inline bool isEqual(const std::string& lhs, const std::string& rhs) noexcept { return lhs == rhs; }
};
} // namespace Excalibur

TEST(SmFlatHashMap, InlineStorageTest01)
{
// create hash map and insert one element
Excalibur::HashTable<std::string, std::string, 8> ht;

EXPECT_GE(ht.capacity(), uint32_t(4));

auto it1 = ht.emplace(std::string("hello1"), std::string("world1"));
EXPECT_TRUE(it1.second);
auto it2 = ht.emplace(std::string("hello2"), std::string("world2"));
EXPECT_TRUE(it2.second);

EXPECT_EQ(ht.size(), uint32_t(2));

{
auto _it1 = ht.find("hello1");
ASSERT_NE(_it1, ht.end());
const std::string& val1 = _it1->second;
ASSERT_EQ(val1, "world1");

auto _it2 = ht.find("hello2");
ASSERT_NE(_it2, ht.end());
const std::string& val2 = _it2->second;
ASSERT_EQ(val2, "world2");
}

for (int i = 0; i < 1000; i++)
{
ht.emplace(std::to_string(i), "tmp");
}

{
auto _it1 = ht.find("hello1");
ASSERT_NE(_it1, ht.end());
const std::string& val1 = _it1->second;
ASSERT_EQ(val1, "world1");

auto _it2 = ht.find("hello2");
ASSERT_NE(_it2, ht.end());
const std::string& val2 = _it2->second;
ASSERT_EQ(val2, "world2");
}
}


TEST(SmFlatHashMap, AliasNameTest)
{
{
Excalibur::HashMap<int, int> hm;
auto it1 = hm.emplace(1, 2);
EXPECT_TRUE(it1.second);
auto it2 = hm.emplace(2, 3);
EXPECT_TRUE(it2.second);

auto _it1 = hm.find(1);
ASSERT_NE(_it1, hm.end());

auto _it2 = hm.find(2);
ASSERT_NE(_it2, hm.end());

auto _it3 = hm.find(3);
ASSERT_EQ(_it3, hm.end());

const int& val1 = _it1->second;
const int& val2 = _it2->second;
ASSERT_EQ(val1, 2);
ASSERT_EQ(val2, 3);
}

{
Excalibur::HashSet<int> hs;
auto it1 = hs.emplace(1);
EXPECT_TRUE(it1.second);
auto it2 = hs.emplace(1);
EXPECT_FALSE(it2.second);
auto it3 = hs.emplace(2);
EXPECT_TRUE(it3.second);

EXPECT_TRUE(hs.has(1));
EXPECT_TRUE(hs.has(2));
EXPECT_FALSE(hs.has(3));
}






}
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,14 @@ It uses an open addressing hash table and manages removed items with a method ca
Engineered for ease of use, Excalibur Hash, in a vast majority of cases (99%), serves as a seamless, drop-in alternative to std::unordered_map. However, it's important to note that Excalibur Hash does not guarantee stable addressing.
So, if your project needs to hold direct pointers to the keys or values, Excalibur Hash might not work as you expect. This aside, its design and efficiency make it a great choice for applications where speed is crucial.

## Features

1. Extremely fast (see Performance section for details)
2. CPU cache friendly
3. Built-in configurable inline storage
4. Can either work as a map (key, value) or as a set (keys only)


## Performance

In this section, you can see a performance comparison against a few popular hash table implementations.
Expand Down

0 comments on commit 7d001e9

Please sign in to comment.