Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Configurable inplace storage #10

Merged
merged 7 commits into from
Feb 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ set(TEST_SOURCES
ExcaliburHashTest02.cpp
ExcaliburHashTest03.cpp
ExcaliburHashTest04.cpp
ExcaliburHashTest05.cpp
)

set (TEST_EXE_NAME ${PROJ_NAME})
Expand Down
158 changes: 83 additions & 75 deletions ExcaliburHash/ExcaliburHash.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ TODO: Design descisions/principles
TODO: Memory layout

*/
template <typename TKey, typename TValue, typename TKeyInfo = KeyInfo<TKey>> class HashTable
template <typename TKey, typename TValue, unsigned kNumInlineItems = 1, typename TKeyInfo = KeyInfo<TKey>> class HashTable
{
struct has_values : std::bool_constant<!std::is_same<std::nullptr_t, typename std::remove_reference<TValue>::type>::value>
{
Expand Down Expand Up @@ -241,7 +241,7 @@ template <typename TKey, typename TValue, typename TKeyInfo = KeyInfo<TKey>> cla
if (!other.isUsingInlineStorage())
{
// if not using inline storage than it's a simple pointer swap
allocateInline(TKeyInfo::getEmpty());
constructInline(TKeyInfo::getEmpty());
m_storage = other.m_storage;
m_numBuckets = other.m_numBuckets;
m_numElements = other.m_numElements;
Expand All @@ -252,26 +252,12 @@ template <typename TKey, typename TValue, typename TKeyInfo = KeyInfo<TKey>> cla
else
{
// if using inline storage than let's move items from one inline storage into another
TItem* otherInlineItem = reinterpret_cast<TItem*>(&other.m_inlineStorage);
bool hasValidValue = otherInlineItem->isValid();
TItem* inlineItem = allocateInline(std::move(*otherInlineItem->key()));
TItem* otherInlineItems = reinterpret_cast<TItem*>(&other.m_inlineStorage);
TItem* inlineItems = moveInline(otherInlineItems);

if constexpr (has_values::value)
{
// move inline storage value (if any)
if (hasValidValue)
{
TValue* value = inlineItem->value();
TValue* otherValue = otherInlineItem->value();
construct<TValue>(value, std::move(*otherValue));
destruct(otherValue);
}
}

m_storage = inlineItem;
m_storage = inlineItems;
m_numBuckets = other.m_numBuckets;
m_numElements = other.m_numElements;
// destruct(otherInlineItem);
other.m_storage = nullptr;
// other.m_numBuckets = 0;
// other.m_numElements = 0;
Expand Down Expand Up @@ -310,11 +296,54 @@ template <typename TKey, typename TValue, typename TKeyInfo = KeyInfo<TKey>> cla
return (inlineStorage == m_storage);
}

template <class... Args> inline TItem* allocateInline(Args&&... args)
template <class... Args> inline TItem* constructInline(Args&&... args)
{
TItem* inlineItem = reinterpret_cast<TItem*>(&m_inlineStorage);
construct<TItem>(inlineItem, std::forward<Args>(args)...);
return inlineItem;
TItem* inlineItems = reinterpret_cast<TItem*>(&m_inlineStorage);
for (unsigned i = 0; i < kNumInlineItems; i++)
{
construct<TItem>((inlineItems + i), std::forward<Args>(args)...);
}
return inlineItems;
}

inline TItem* moveInline(TItem* from)
{
TItem* inlineItems = reinterpret_cast<TItem*>(&m_inlineStorage);

if constexpr (has_values::value)
{
// move all keys and valid values
for (unsigned i = 0; i < kNumInlineItems; i++)
{
TItem* inlineItem = (inlineItems + i);
TItem& otherInlineItem = from[i];
const bool hasValidValue = otherInlineItem.isValid();
construct<TItem>((inlineItems + i), std::move(*otherInlineItem.key()));

// move inline storage value (if any)
if (hasValidValue)
{
TValue* value = inlineItem->value();
TValue* otherValue = otherInlineItem.value();
construct<TValue>(value, std::move(*otherValue));

if constexpr (!std::is_trivially_destructible<TValue>::value)
{
destruct(otherValue);
}
}
}
}
else
{
// move only keys
for (unsigned i = 0; i < kNumInlineItems; i++)
{
construct<TItem>((inlineItems + i), std::move(*from[i].key()));
}
}

return inlineItems;
}

inline uint32_t create(uint32_t numBuckets)
Expand Down Expand Up @@ -476,7 +505,7 @@ template <typename TKey, typename TValue, typename TKeyInfo = KeyInfo<TKey>> cla
protected:
const HashTable* m_ht;
TItem* m_item;
friend class HashTable<TKey, TValue, TKeyInfo>;
friend class HashTable<TKey, TValue, kNumInlineItems, TKeyInfo>;
};

class IteratorK : public IteratorBase
Expand Down Expand Up @@ -592,10 +621,10 @@ template <typename TKey, typename TValue, typename TKeyInfo = KeyInfo<TKey>> cla

HashTable() noexcept
//: m_storage(nullptr)
: m_numBuckets(1)
: m_numBuckets(kNumInlineItems)
, m_numElements(0)
{
m_storage = allocateInline(TKeyInfo::getEmpty());
m_storage = constructInline(TKeyInfo::getEmpty());
}

~HashTable()
Expand Down Expand Up @@ -704,6 +733,15 @@ template <typename TKey, typename TValue, typename TKeyInfo = KeyInfo<TKey>> cla
TItem* storage = m_storage;
TItem* EXLBR_RESTRICT item = storage;
TItem* const enditem = item + numBuckets;

// check if such element is already exist
// in this case we don't need to do anything
TItem* existingItem = findImpl(key);
if (existingItem != enditem)
{
return std::make_pair(IteratorKV(this, existingItem), false);
}

bool isInlineStorage = isUsingInlineStorage();

numBucketsNew = create(numBucketsNew);
Expand All @@ -714,7 +752,7 @@ template <typename TKey, typename TValue, typename TKeyInfo = KeyInfo<TKey>> cla
// i.e.
// auto it = table.find("key");
// table.emplace("another_key", it->second); // <--- when hash table grows it->second will point to a memory we are about to free
auto it = emplaceToExisting(numBucketsNew, key, args...);
std::pair<IteratorKV, bool> it = emplaceToExisting(numBucketsNew, key, args...);

reinsert(numBucketsNew, item, enditem);

Expand Down Expand Up @@ -803,42 +841,6 @@ template <typename TKey, typename TValue, typename TKeyInfo = KeyInfo<TKey>> cla
return ConstIteratorKV(this, item);
}

/*
inline bool erase(const IteratorBase it)
{
if (it == IteratorHelper<IteratorBase>::end(*this))
{
return false;
}

EXLBR_ASSERT(m_numElements != 0);
m_numElements--;

if constexpr ((!std::is_trivially_destructible<TValue>::value) && (has_values::value))
{
TValue* itemValue = const_cast<TValue*>(it.getValue());
destruct(itemValue);
}

// hash table now is empty. convert all tombstones to empty keys
if (m_numElements == 0)
{
TItem* EXLBR_RESTRICT item = m_storage;
TItem* const endItem = item + m_numBuckets;
for (; item != endItem; item++)
{
*item->key() = TKeyInfo::getEmpty();
}
return true;
}

// overwrite key with empty key
TKey* itemKey = const_cast<TKey*>(it.getKey());
*itemKey = TKeyInfo::getTombstone();
return true;
}
*/

inline bool erase(const TKey& key)
{
auto it = find(key);
Expand Down Expand Up @@ -880,14 +882,6 @@ template <typename TKey, typename TValue, typename TKeyInfo = KeyInfo<TKey>> cla

inline TValue& operator[](const TKey& key)
{
IteratorKV it = find(key);
if (it != iend())
{
return it.value();
}
// note: we can not use `emplace()` without calling `find()` function first
// because calling `emplace()` function might grow the hash table even if a key exists in the table (which will invalidate existing
// iterators)
std::pair<IteratorKV, bool> emplaceIt = emplace(key);
return emplaceIt.first.value();
}
Expand Down Expand Up @@ -933,7 +927,7 @@ template <typename TKey, typename TValue, typename TKeyInfo = KeyInfo<TKey>> cla
HashTable(const HashTable& other)
{
EXLBR_ASSERT(&other != this);
m_storage = allocateInline(TKeyInfo::getEmpty());
m_storage = constructInline(TKeyInfo::getEmpty());
create(other.m_numBuckets);
copyFrom(other);
}
Expand All @@ -946,7 +940,7 @@ template <typename TKey, typename TValue, typename TKeyInfo = KeyInfo<TKey>> cla
return *this;
}
destroyAndFreeMemory();
m_storage = allocateInline(TKeyInfo::getEmpty());
m_storage = constructInline(TKeyInfo::getEmpty());
create(other.m_numBuckets);
copyFrom(other);
return *this;
Expand Down Expand Up @@ -980,10 +974,24 @@ template <typename TKey, typename TValue, typename TKeyInfo = KeyInfo<TKey>> cla
uint32_t m_numBuckets; // 4
uint32_t m_numElements; // 4

template <typename INTEGRAL_TYPE> inline static constexpr bool isPow2(INTEGRAL_TYPE x) noexcept
{
static_assert(std::is_integral<INTEGRAL_TYPE>::value, "isPow2 must be called on an integer type.");
return (x & (x - 1)) == 0 && (x != 0);
}

// We need this inline storage to keep `m_storage` not null all the time.
// This will save us from `empty()` check inside `find()` function implementation
typename std::aligned_storage<sizeof(TItem), alignof(TItem)>::type m_inlineStorage;
static_assert(sizeof(m_inlineStorage) == sizeof(TItem), "Incorrect sizeof");
static_assert(kNumInlineItems != 0, "Num inline items can't be zero!");
static_assert(isPow2(kNumInlineItems), "Num inline items should be power of two");
typename std::aligned_storage<sizeof(TItem) * kNumInlineItems, alignof(TItem)>::type m_inlineStorage;
static_assert(sizeof(m_inlineStorage) == (sizeof(TItem) * kNumInlineItems), "Incorrect sizeof");
};

// hashmap declaration
template <typename TKey, typename TValue> using HashMap = HashTable<TKey, TValue, 1, KeyInfo<TKey>>;

// hashset declaration
template <typename TKey> using HashSet = HashTable<TKey, std::nullptr_t, 1, KeyInfo<TKey>>;

} // namespace Excalibur
2 changes: 1 addition & 1 deletion ExcaliburHashTest02.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ TEST(SmFlatHashMap, CtorDtorCallCount)

{
// empty hash table
Excalibur::HashTable<ComplexStruct, int> ht;
Excalibur::HashTable<ComplexStruct, int, 4> ht;
EXPECT_TRUE(ht.empty());
EXPECT_EQ(ht.size(), 0u);
EXPECT_GE(ht.capacity(), 0u);
Expand Down
110 changes: 110 additions & 0 deletions ExcaliburHashTest05.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
#include "ExcaliburHash.h"
#include "gtest/gtest.h"
#include <array>
#include <cstring>

namespace Excalibur
{
template <> struct KeyInfo<std::string>
{
static inline bool isValid(const std::string& key) noexcept { return !key.empty() && key.data()[0] != char(1); }
static inline std::string getTombstone() noexcept
{
// and let's hope that small string optimization will do the job
return std::string(1, char(1));
}
static inline std::string getEmpty() noexcept { return std::string(); }
static inline uint64_t hash(const std::string& key) noexcept { return std::hash<std::string>{}(key); }
static inline bool isEqual(const std::string& lhs, const std::string& rhs) noexcept { return lhs == rhs; }
};
} // namespace Excalibur

TEST(SmFlatHashMap, InlineStorageTest01)
{
// create hash map and insert one element
Excalibur::HashTable<std::string, std::string, 8> ht;

EXPECT_GE(ht.capacity(), uint32_t(4));

auto it1 = ht.emplace(std::string("hello1"), std::string("world1"));
EXPECT_TRUE(it1.second);
auto it2 = ht.emplace(std::string("hello2"), std::string("world2"));
EXPECT_TRUE(it2.second);

EXPECT_EQ(ht.size(), uint32_t(2));

{
auto _it1 = ht.find("hello1");
ASSERT_NE(_it1, ht.end());
const std::string& val1 = _it1->second;
ASSERT_EQ(val1, "world1");

auto _it2 = ht.find("hello2");
ASSERT_NE(_it2, ht.end());
const std::string& val2 = _it2->second;
ASSERT_EQ(val2, "world2");
}

for (int i = 0; i < 1000; i++)
{
ht.emplace(std::to_string(i), "tmp");
}

{
auto _it1 = ht.find("hello1");
ASSERT_NE(_it1, ht.end());
const std::string& val1 = _it1->second;
ASSERT_EQ(val1, "world1");

auto _it2 = ht.find("hello2");
ASSERT_NE(_it2, ht.end());
const std::string& val2 = _it2->second;
ASSERT_EQ(val2, "world2");
}
}


TEST(SmFlatHashMap, AliasNameTest)
{
{
Excalibur::HashMap<int, int> hm;
auto it1 = hm.emplace(1, 2);
EXPECT_TRUE(it1.second);
auto it2 = hm.emplace(2, 3);
EXPECT_TRUE(it2.second);

auto _it1 = hm.find(1);
ASSERT_NE(_it1, hm.end());

auto _it2 = hm.find(2);
ASSERT_NE(_it2, hm.end());

auto _it3 = hm.find(3);
ASSERT_EQ(_it3, hm.end());

const int& val1 = _it1->second;
const int& val2 = _it2->second;
ASSERT_EQ(val1, 2);
ASSERT_EQ(val2, 3);
}

{
Excalibur::HashSet<int> hs;
auto it1 = hs.emplace(1);
EXPECT_TRUE(it1.second);
auto it2 = hs.emplace(1);
EXPECT_FALSE(it2.second);
auto it3 = hs.emplace(2);
EXPECT_TRUE(it3.second);

EXPECT_TRUE(hs.has(1));
EXPECT_TRUE(hs.has(2));
EXPECT_FALSE(hs.has(3));
}






}
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,14 @@ It uses an open addressing hash table and manages removed items with a method ca
Engineered for ease of use, Excalibur Hash, in a vast majority of cases (99%), serves as a seamless, drop-in alternative to std::unordered_map. However, it's important to note that Excalibur Hash does not guarantee stable addressing.
So, if your project needs to hold direct pointers to the keys or values, Excalibur Hash might not work as you expect. This aside, its design and efficiency make it a great choice for applications where speed is crucial.

## Features

1. Extremely fast (see Performance section for details)
2. CPU cache friendly
3. Built-in configurable inline storage
4. Can either work as a map (key, value) or as a set (keys only)


## Performance

In this section, you can see a performance comparison against a few popular hash table implementations.
Expand Down