Skip to content

Commit

Permalink
tombstone counter, rehash, faster hash for integer types
Browse files Browse the repository at this point in the history
  • Loading branch information
SergeyMakeev committed Mar 12, 2024
1 parent b99119a commit 097c433
Show file tree
Hide file tree
Showing 6 changed files with 128 additions and 32 deletions.
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ set(TEST_SOURCES
ExcaliburHashTest03.cpp
ExcaliburHashTest04.cpp
ExcaliburHashTest05.cpp
ExcaliburHashTest06.cpp
)

set (TEST_EXE_NAME ${PROJ_NAME})
Expand Down
78 changes: 52 additions & 26 deletions ExcaliburHash/ExcaliburHash.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
#define EXLBR_ALLOC(sizeInBytes, alignment) _mm_malloc(sizeInBytes, alignment)
#define EXLBR_FREE(ptr) _mm_free(ptr)
#else
// Posix
// Posix
#include <stdlib.h>
#define EXLBR_ALLOC(sizeInBytes, alignment) aligned_alloc(alignment, sizeInBytes)
#define EXLBR_FREE(ptr) free(ptr)
Expand Down Expand Up @@ -198,10 +198,12 @@ template <typename TKey, typename TValue, unsigned kNumInlineItems = 1, typename
m_storage = other.m_storage;
m_numBuckets = other.m_numBuckets;
m_numElements = other.m_numElements;
m_numTombstones = other.m_numTombstones;
other.m_storage = nullptr;
// don't need to zero rest of the members because dtor doesn't use them
// other.m_numBuckets = 0;
// other.m_numElements = 0;
// other.m_numTombstones = 0;
}
else
{
Expand All @@ -212,10 +214,12 @@ template <typename TKey, typename TValue, unsigned kNumInlineItems = 1, typename
m_storage = inlineItems;
m_numBuckets = other.m_numBuckets;
m_numElements = other.m_numElements;
m_numTombstones = other.m_numTombstones;
// note: other's online items will be destroyed automatically when its dtor called
// other.m_storage = nullptr;
// other.m_numBuckets = 0;
// other.m_numElements = 0;
// other.m_numTombstones = 0;
}
}

Expand Down Expand Up @@ -321,6 +325,7 @@ template <typename TKey, typename TValue, unsigned kNumInlineItems = 1, typename

m_numBuckets = numBuckets;
m_numElements = 0;
m_numTombstones = 0;

TItem* EXLBR_RESTRICT item = m_storage;
TItem* const endItem = item + numBuckets;
Expand Down Expand Up @@ -586,6 +591,7 @@ template <typename TKey, typename TValue, unsigned kNumInlineItems = 1, typename
//: m_storage(nullptr)
: m_numBuckets(kNumInlineItems)
, m_numElements(0)
, m_numTombstones(0)
{
m_storage = constructInline(TKeyInfo::getEmpty());
}
Expand Down Expand Up @@ -624,31 +630,38 @@ template <typename TKey, typename TValue, unsigned kNumInlineItems = 1, typename
}
// TODO: shrink if needed?
m_numElements = 0;
m_numTombstones = 0;
}

private:
template <typename TK, class... Args>
inline std::pair<IteratorKV, bool> emplaceToExisting(size_t numBuckets, TK&& key, Args&&... args)
template <typename TK, class... Args> inline std::pair<IteratorKV, bool> emplaceToExisting(size_t numBuckets, TK&& key, Args&&... args)
{
// numBuckets has to be power-of-two
EXLBR_ASSERT(numBuckets > 0);
EXLBR_ASSERT((numBuckets & (numBuckets - 1)) == 0);
EXLBR_ASSERT(isPow2(numBuckets));
const size_t hashValue = TKeyInfo::hash(key);
const size_t bucketIndex = hashValue & (numBuckets - 1);
TItem* const firstItem = m_storage;
TItem* const endItem = firstItem + numBuckets;
TItem* EXLBR_RESTRICT currentItem = firstItem + bucketIndex;
TItem* EXLBR_RESTRICT insertItem = nullptr;
TItem* EXLBR_RESTRICT foundTombstoneItem = nullptr;

while (true)
{
// key is already exist
if (currentItem->isEqual(key))
{
return std::make_pair(IteratorKV(this, currentItem), false);
}

// if we found an empty bucket, the key doesn't exist in the set.
if (currentItem->isEmpty())
{
insertItem = ((insertItem == nullptr) ? currentItem : insertItem);
TItem* EXLBR_RESTRICT insertItem = ((foundTombstoneItem == nullptr) ? currentItem : foundTombstoneItem);

if (foundTombstoneItem)
{
m_numTombstones--;
}

// move key
*insertItem->key() = std::move(key);
Expand All @@ -660,10 +673,13 @@ template <typename TKey, typename TValue, unsigned kNumInlineItems = 1, typename
m_numElements++;
return std::make_pair(IteratorKV(this, insertItem), true);
}
if (currentItem->isTombstone() && insertItem == nullptr)

// if we found a tombstone, remember it. If key is not exist in the table, we prefer to return tombmstone to minimize probing.
if (currentItem->isTombstone() && foundTombstoneItem == nullptr)
{
insertItem = currentItem;
foundTombstoneItem = currentItem;
}

currentItem++;
currentItem = (currentItem == endItem) ? firstItem : currentItem;
}
Expand Down Expand Up @@ -746,13 +762,13 @@ template <typename TKey, typename TValue, unsigned kNumInlineItems = 1, typename
EXLBR_ASSERT(!TKeyInfo::isEqual(TKeyInfo::getEmpty(), TKeyInfo::getTombstone()));
uint32_t numBuckets = m_numBuckets;

// numBucketsThreshold = (numBuckets * 3/4) (but implemented using bit shifts)
const uint32_t numBucketsThreshold = shr(numBuckets, 1u) + shr(numBuckets, 2u);
if (EXLBR_LIKELY(m_numElements <= numBucketsThreshold))
// numBucketsThreshold = (numBuckets * 1/2) (but implemented using bit shifts)
const uint32_t numBucketsThreshold = shr(numBuckets, 1u) + 1;
if (EXLBR_LIKELY(m_numElements < numBucketsThreshold))
{
return emplaceToExisting(numBuckets, key, args...);
}
return emplaceReallocate(numBuckets * 2, key, args...);
return emplaceReallocate(std::max(numBuckets * 2, 64u), key, args...);
}

[[nodiscard]] inline ConstIteratorKV find(const TKey& key) const noexcept
Expand Down Expand Up @@ -785,19 +801,17 @@ template <typename TKey, typename TValue, unsigned kNumInlineItems = 1, typename
destruct(itemValue);
}

// hash table now is empty. convert all tombstones to empty keys
TKey* itemKey = const_cast<TKey*>(it.getKey());
if (m_numElements == 0)
{
for (; item != endItem; item++)
{
*item->key() = TKeyInfo::getEmpty();
}
// hash table is now empty. it is safe to write empty value insted of tombstone
*itemKey = TKeyInfo::getEmpty();
return endItem;
}

// overwrite key with empty key
TKey* itemKey = const_cast<TKey*>(it.getKey());
*itemKey = TKeyInfo::getTombstone();
m_numTombstones++;
return IteratorBase::getNextValidItem(it.m_item, endItem);
}

Expand All @@ -820,14 +834,10 @@ template <typename TKey, typename TValue, unsigned kNumInlineItems = 1, typename
return (it != iend());
}

inline bool reserve(uint32_t numBucketsNew)
private:
void resize(uint32_t numBucketsNew)
{
if (numBucketsNew == 0 || numBucketsNew < capacity())
{
return false;
}
numBucketsNew = nextPow2(numBucketsNew);

EXLBR_ASSERT(isPow2(numBucketsNew));
const uint32_t numBuckets = m_numBuckets;
TItem* storage = m_storage;
TItem* EXLBR_RESTRICT item = storage;
Expand All @@ -842,10 +852,23 @@ template <typename TKey, typename TValue, unsigned kNumInlineItems = 1, typename
{
EXLBR_FREE(storage);
}
}

public:
inline void rehash() { resize(m_numBuckets); }

inline bool reserve(uint32_t numBucketsNew)
{
if (numBucketsNew == 0 || numBucketsNew < capacity())
{
return false;
}
numBucketsNew = nextPow2(numBucketsNew);
resize(numBucketsNew);
return true;
}

[[nodiscard]] inline uint32_t getNumTombstones() const noexcept { return m_numTombstones; }
[[nodiscard]] inline uint32_t size() const noexcept { return m_numElements; }
[[nodiscard]] inline uint32_t capacity() const noexcept { return m_numBuckets; }
[[nodiscard]] inline bool empty() const noexcept { return (m_numElements == 0); }
Expand Down Expand Up @@ -945,6 +968,9 @@ template <typename TKey, typename TValue, unsigned kNumInlineItems = 1, typename
TItem* m_storage; // 8
uint32_t m_numBuckets; // 4
uint32_t m_numElements; // 4
uint32_t m_numTombstones; // 4
//padding 4


template <typename INTEGRAL_TYPE> inline static constexpr bool isPow2(INTEGRAL_TYPE x) noexcept
{
Expand Down
28 changes: 24 additions & 4 deletions ExcaliburHash/ExcaliburKeyInfo.h
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
#pragma once
#ifndef EXLBR_IGNORE_BUILTIN_KEYINFO

#include <string>

#include "wyhash.h"

#define EXLBR_USE_SIMPLE_HASH (1)

namespace Excalibur
{

Expand All @@ -22,16 +25,24 @@ template <> struct KeyInfo<int32_t>
static inline bool isValid(const int32_t& key) noexcept { return key < INT32_C(0x7ffffffe); }
static inline int32_t getTombstone() noexcept { return INT32_C(0x7fffffff); }
static inline int32_t getEmpty() noexcept { return INT32_C(0x7ffffffe); }
#if EXLBR_USE_SIMPLE_HASH
static inline size_t hash(const int32_t& key) noexcept { return key * 37U; }
#else
static inline size_t hash(const int32_t& key) noexcept { return Excalibur::wyhash::hash(key); }
#endif
static inline bool isEqual(const int32_t& lhs, const int32_t& rhs) noexcept { return lhs == rhs; }
};

template <> struct KeyInfo<uint32_t>
{
static inline bool isValid(const uint32_t& key) noexcept { return key < UINT32_C(0xfffffffe); }
static inline uint32_t getTombstone() noexcept {return UINT32_C(0xfffffffe); }
static inline uint32_t getEmpty() noexcept {return UINT32_C(0xffffffff); }
static inline uint32_t getTombstone() noexcept { return UINT32_C(0xfffffffe); }
static inline uint32_t getEmpty() noexcept { return UINT32_C(0xffffffff); }
#if EXLBR_USE_SIMPLE_HASH
static inline size_t hash(const uint32_t& key) noexcept { return key * 37U; }
#else
static inline size_t hash(const uint32_t& key) noexcept { return Excalibur::wyhash::hash(key); }
#endif
static inline bool isEqual(const uint32_t& lhs, const uint32_t& rhs) noexcept { return lhs == rhs; }
};

Expand All @@ -40,7 +51,11 @@ template <> struct KeyInfo<int64_t>
static inline bool isValid(const int64_t& key) noexcept { return key < INT64_C(0x7ffffffffffffffe); }
static inline int64_t getTombstone() noexcept { return INT64_C(0x7fffffffffffffff); }
static inline int64_t getEmpty() noexcept { return INT64_C(0x7ffffffffffffffe); }
#if EXLBR_USE_SIMPLE_HASH
static inline size_t hash(const int64_t& key) noexcept { return key * 37ULL; }
#else
static inline size_t hash(const int64_t& key) noexcept { return Excalibur::wyhash::hash(key); }
#endif
static inline bool isEqual(const int64_t& lhs, const int64_t& rhs) noexcept { return lhs == rhs; }
};

Expand All @@ -49,11 +64,15 @@ template <> struct KeyInfo<uint64_t>
static inline bool isValid(const uint64_t& key) noexcept { return key < UINT64_C(0xfffffffffffffffe); }
static inline uint64_t getTombstone() noexcept { return UINT64_C(0xfffffffffffffffe); }
static inline uint64_t getEmpty() noexcept { return UINT64_C(0xffffffffffffffff); }
#if EXLBR_USE_SIMPLE_HASH
static inline size_t hash(const uint64_t& key) noexcept { return key * 37ULL; }
#else
static inline size_t hash(const uint64_t& key) noexcept { return Excalibur::wyhash::hash(key); }
#endif
static inline bool isEqual(const uint64_t& lhs, const uint64_t& rhs) noexcept { return lhs == rhs; }
};

template <> struct KeyInfo<std::string>
template <> struct KeyInfo<std::string>
{
static inline bool isValid(const std::string& key) noexcept { return !key.empty() && key.data()[0] != char(1); }
static inline std::string getTombstone() noexcept
Expand All @@ -66,5 +85,6 @@ template <> struct KeyInfo<uint64_t>
static inline bool isEqual(const std::string& lhs, const std::string& rhs) noexcept { return lhs == rhs; }
};


} // namespace Excalibur

#endif
5 changes: 3 additions & 2 deletions ExcaliburHash/wyhash.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,14 +60,15 @@ inline uint64_t _hash64(uint64_t v)

inline uint32_t _hash32(uint32_t v)
{
// multiplier from https://arxiv.org/abs/2001.05304
#if EXLBR_VISUAL_STUDIO
{
uint64_t lh = __emulu(v, UINT32_C(0x9e3779b1));
uint64_t lh = __emulu(v, UINT32_C(0xE817FB2D));
return (uint32_t)(lh >> 32U) ^ (uint32_t)(lh);
}
#elif defined(EXLBR_CLANG) || defined(EXLBR_GCC)
{
uint64_t lh = uint64_t(v) * uint64_t(0x9e3779b1);
uint64_t lh = uint64_t(v) * uint64_t(0xE817FB2D);
return (uint32_t)(lh >> 32U) ^ (uint32_t)(lh);
}
#else
Expand Down
2 changes: 2 additions & 0 deletions ExcaliburHashTest01.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ TEST(SmFlatHashMap, SimplestTest)
EXPECT_EQ(it1.first, it2);
EXPECT_EQ(it2.key(), 1);
EXPECT_EQ(it2.value(), 2);
auto it3 = ht.emplace(3, 4);
EXPECT_TRUE(it3.second);
}

TEST(SmFlatHashMap, EmptyValuesTest)
Expand Down
46 changes: 46 additions & 0 deletions ExcaliburHashTest06.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#include "ExcaliburHash.h"
#include "gtest/gtest.h"
#include <array>
#include <cstring>

TEST(SmFlatHashMap, Rehash)
{
Excalibur::HashTable<int, int> ht;
ht.emplace(1, -1);
ht.erase(1);

ht.emplace(2, -2);
ht.emplace(5, -5);
ht.erase(5);

ht.emplace(7, -7);
ht.erase(7);

ht.emplace(8, -8);
ht.emplace(9, -9);
ht.erase(9);

ht.emplace(10, -10);
ht.erase(10);

ht.emplace(11, -11);
ht.erase(11);

ht.emplace(12, -12);
ht.erase(12);

ht.emplace(13, -13);
ht.erase(13);

ht.emplace(14, -14);
ht.erase(14);

ht.emplace(15, -15);

EXPECT_GE(ht.getNumTombstones(), uint32_t(1));

ht.rehash();

EXPECT_EQ(ht.getNumTombstones(), uint32_t(0));
}

0 comments on commit 097c433

Please sign in to comment.