From 8fd79f07d5dee2c7f0aaa3304f25c5b3a8bd2814 Mon Sep 17 00:00:00 2001 From: Patrick Pelissier Date: Tue, 7 May 2024 10:05:49 +0200 Subject: [PATCH] Mark the functions to be optimized those that shall not be. --- m-dict.h | 39 +++++++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/m-dict.h b/m-dict.h index ed665e2a..b371d20d 100644 --- a/m-dict.h +++ b/m-dict.h @@ -1513,7 +1513,7 @@ enum m_d1ct_oa_element_e { dict->data = NULL; \ } \ \ - M_INLINE value_type * \ + M_INLINE value_type * M_ATTR_HOT_FUNCTION \ M_F(name, _get)(const dict_t dict, key_type const key) \ { \ M_D1CT_OA_CONTRACT(dict); \ @@ -1555,7 +1555,7 @@ enum m_d1ct_oa_element_e { } \ ) \ \ - M_INLINE void \ + M_INLINE void M_ATTR_COLD_FUNCTION \ M_C3(m_d1ct_,name,_resize_up)(dict_t h, size_t newSize, bool updateLimit) \ { \ size_t oldSize = h->mask+1; \ @@ -1571,17 +1571,29 @@ enum m_d1ct_oa_element_e { } \ \ /* First mark the extended space as empty */ \ - for(size_t i = oldSize ; i < newSize; i++) \ - M_CALL_OOR_SET(key_oplist, data[i].key, M_D1CT_OA_EMPTY); \ + M_ASSUME( oldSize + 8 < newSize); \ + for(size_t i = oldSize ; i < newSize; i+=8) { \ + /* Unroll loop as compiler doesn't do it by itself */ \ + M_CALL_OOR_SET(key_oplist, data[i+0].key, M_D1CT_OA_EMPTY); \ + M_CALL_OOR_SET(key_oplist, data[i+1].key, M_D1CT_OA_EMPTY); \ + M_CALL_OOR_SET(key_oplist, data[i+2].key, M_D1CT_OA_EMPTY); \ + M_CALL_OOR_SET(key_oplist, data[i+3].key, M_D1CT_OA_EMPTY); \ + M_CALL_OOR_SET(key_oplist, data[i+4].key, M_D1CT_OA_EMPTY); \ + M_CALL_OOR_SET(key_oplist, data[i+5].key, M_D1CT_OA_EMPTY); \ + M_CALL_OOR_SET(key_oplist, data[i+6].key, M_D1CT_OA_EMPTY); \ + M_CALL_OOR_SET(key_oplist, data[i+7].key, M_D1CT_OA_EMPTY); \ + } \ } \ \ /* Then let's rehash all the entries in their **exact** position. \ If we can't, let's put them in the 'tmp' array. \ It has been measured that the size of this 'tmp' array is \ around 6% of the size of updated dictionary. \ - NOTE: This should be much cache friendly than typical hash code */ \ + NOTE: This should be much cache friendly than typical hash code \ + Reserve a little bit of array to avoid reallocation if possible */ \ M_F(name, _array_pair_ct) tmp; \ M_F(name, _array_pair_init)(tmp); \ + M_F(name, _array_pair_reserve)(tmp, oldSize >> 2); \ const size_t mask = (newSize -1); \ \ for(size_t i = 0 ; i < oldSize; i++) { \ @@ -1611,13 +1623,12 @@ enum m_d1ct_oa_element_e { while (M_F(name, _array_pair_size)(tmp) > 0) { \ M_F(name, _pair_ct) const *item = M_F(name, _array_pair_back)(tmp); \ size_t p = M_CALL_HASH(key_oplist, item->key) & mask; \ + size_t s = 1; \ /* NOTE: since the first pass, the bucket might be free now */ \ - if (!M_CALL_OOR_EQUAL(key_oplist, data[p].key, M_D1CT_OA_EMPTY)) { \ - size_t s = 1; \ - do { \ - p = (p + M_D1CT_OA_PROBING(s)) & mask; \ - M_ASSERT (s <= h->mask); \ - } while (!M_CALL_OOR_EQUAL(key_oplist, data[p].key, M_D1CT_OA_EMPTY) ); \ + /* Likely cache miss */ \ + while (!M_CALL_OOR_EQUAL(key_oplist, data[p].key, M_D1CT_OA_EMPTY)) { \ + p = (p + M_D1CT_OA_PROBING(s)) & mask; \ + M_ASSERT (s <= h->mask); \ } \ M_F(name, _array_pair_pop_move)(&data[p], tmp); \ } \ @@ -1633,7 +1644,7 @@ enum m_d1ct_oa_element_e { M_D1CT_OA_CONTRACT(h); \ } \ \ - M_INLINE void \ + M_INLINE void M_ATTR_HOT_FUNCTION \ M_IF(isSet)(M_F(name, _push), M_F(name,_set_at)) \ (dict_t dict, key_type const key \ M_IF(isSet)(, M_DEFERRED_COMMA value_type const value) ) \ @@ -1693,7 +1704,7 @@ enum m_d1ct_oa_element_e { M_D1CT_OA_CONTRACT(dict); \ } \ \ - M_INLINE value_type * \ + M_INLINE value_type * M_ATTR_HOT_FUNCTION \ M_F(name,_safe_get)(dict_t dict, key_type const key) \ { \ M_D1CT_OA_CONTRACT(dict); \ @@ -1755,7 +1766,7 @@ enum m_d1ct_oa_element_e { return M_F(name,_safe_get)(dict, key); \ } \ \ - M_INLINE void \ + M_INLINE void M_ATTR_COLD_FUNCTION \ M_C3(m_d1ct_,name,_resize_down)(dict_t h, size_t newSize) \ { \ size_t oldSize = h->mask+1; \