From 8fd79f07d5dee2c7f0aaa3304f25c5b3a8bd2814 Mon Sep 17 00:00:00 2001
From: Patrick Pelissier <Patrick.Pelissier@gmail.com>
Date: Tue, 7 May 2024 10:05:49 +0200
Subject: [PATCH] Mark the functions to be optimized those that shall not be.

---
 m-dict.h | 39 +++++++++++++++++++++++++--------------
 1 file changed, 25 insertions(+), 14 deletions(-)

diff --git a/m-dict.h b/m-dict.h
index ed665e2a..b371d20d 100644
--- a/m-dict.h
+++ b/m-dict.h
@@ -1513,7 +1513,7 @@ enum m_d1ct_oa_element_e {
     dict->data = NULL;                                                        \
   }                                                                           \
                                                                               \
-  M_INLINE value_type *                                                       \
+  M_INLINE value_type * M_ATTR_HOT_FUNCTION                                   \
   M_F(name, _get)(const dict_t dict, key_type const key)                      \
   {                                                                           \
     M_D1CT_OA_CONTRACT(dict);                                                 \
@@ -1555,7 +1555,7 @@ enum m_d1ct_oa_element_e {
   }                                                                           \
   )                                                                           \
                                                                               \
-  M_INLINE void                                                               \
+  M_INLINE void M_ATTR_COLD_FUNCTION                                          \
   M_C3(m_d1ct_,name,_resize_up)(dict_t h, size_t newSize, bool updateLimit)   \
   {                                                                           \
     size_t oldSize = h->mask+1;                                               \
@@ -1571,17 +1571,29 @@ enum m_d1ct_oa_element_e {
       }                                                                       \
                                                                               \
       /* First mark the extended space as empty */                            \
-      for(size_t i = oldSize ; i < newSize; i++)                              \
-        M_CALL_OOR_SET(key_oplist, data[i].key, M_D1CT_OA_EMPTY);             \
+      M_ASSUME( oldSize + 8 < newSize);                                       \
+      for(size_t i = oldSize ; i < newSize; i+=8) {                           \
+        /* Unroll loop as compiler doesn't do it by itself */                 \
+        M_CALL_OOR_SET(key_oplist, data[i+0].key, M_D1CT_OA_EMPTY);           \
+        M_CALL_OOR_SET(key_oplist, data[i+1].key, M_D1CT_OA_EMPTY);           \
+        M_CALL_OOR_SET(key_oplist, data[i+2].key, M_D1CT_OA_EMPTY);           \
+        M_CALL_OOR_SET(key_oplist, data[i+3].key, M_D1CT_OA_EMPTY);           \
+        M_CALL_OOR_SET(key_oplist, data[i+4].key, M_D1CT_OA_EMPTY);           \
+        M_CALL_OOR_SET(key_oplist, data[i+5].key, M_D1CT_OA_EMPTY);           \
+        M_CALL_OOR_SET(key_oplist, data[i+6].key, M_D1CT_OA_EMPTY);           \
+        M_CALL_OOR_SET(key_oplist, data[i+7].key, M_D1CT_OA_EMPTY);           \
+      }                                                                       \
     }                                                                         \
                                                                               \
     /* Then let's rehash all the entries in their **exact** position.         \
        If we can't, let's put them in the 'tmp' array.                        \
        It has been measured that the size of this 'tmp' array is              \
        around 6% of the size of updated dictionary.                           \
-       NOTE: This should be much cache friendly than typical hash code  */    \
+       NOTE: This should be much cache friendly than typical hash code        \
+       Reserve a little bit of array to avoid reallocation if possible */     \
     M_F(name, _array_pair_ct) tmp;                                            \
     M_F(name, _array_pair_init)(tmp);                                         \
+    M_F(name, _array_pair_reserve)(tmp, oldSize >> 2);                        \
     const size_t mask = (newSize -1);                                         \
                                                                               \
     for(size_t i = 0 ; i < oldSize; i++) {                                    \
@@ -1611,13 +1623,12 @@ enum m_d1ct_oa_element_e {
     while (M_F(name, _array_pair_size)(tmp) > 0) {                            \
       M_F(name, _pair_ct) const *item = M_F(name, _array_pair_back)(tmp);     \
       size_t p = M_CALL_HASH(key_oplist, item->key) & mask;                   \
+      size_t s = 1;                                                           \
       /* NOTE: since the first pass, the bucket might be free now */          \
-      if (!M_CALL_OOR_EQUAL(key_oplist, data[p].key, M_D1CT_OA_EMPTY)) {      \
-        size_t s = 1;                                                         \
-        do {                                                                  \
-          p = (p + M_D1CT_OA_PROBING(s)) & mask;                              \
-          M_ASSERT (s <= h->mask);                                            \
-        } while (!M_CALL_OOR_EQUAL(key_oplist, data[p].key, M_D1CT_OA_EMPTY) ); \
+      /* Likely cache miss */                                                 \
+      while (!M_CALL_OOR_EQUAL(key_oplist, data[p].key, M_D1CT_OA_EMPTY)) {   \
+        p = (p + M_D1CT_OA_PROBING(s)) & mask;                                \
+        M_ASSERT (s <= h->mask);                                              \
       }                                                                       \
       M_F(name, _array_pair_pop_move)(&data[p], tmp);                         \
     }                                                                         \
@@ -1633,7 +1644,7 @@ enum m_d1ct_oa_element_e {
     M_D1CT_OA_CONTRACT(h);                                                    \
   }                                                                           \
                                                                               \
-  M_INLINE void                                                               \
+  M_INLINE void M_ATTR_HOT_FUNCTION                                           \
   M_IF(isSet)(M_F(name, _push), M_F(name,_set_at))                            \
        (dict_t dict, key_type const key                                       \
         M_IF(isSet)(, M_DEFERRED_COMMA value_type const value) )              \
@@ -1693,7 +1704,7 @@ enum m_d1ct_oa_element_e {
     M_D1CT_OA_CONTRACT(dict);                                                 \
   }                                                                           \
                                                                               \
-  M_INLINE value_type *                                                       \
+  M_INLINE value_type * M_ATTR_HOT_FUNCTION                                   \
   M_F(name,_safe_get)(dict_t dict, key_type const key)                        \
   {                                                                           \
     M_D1CT_OA_CONTRACT(dict);                                                 \
@@ -1755,7 +1766,7 @@ enum m_d1ct_oa_element_e {
     return M_F(name,_safe_get)(dict, key);                                    \
   }                                                                           \
                                                                               \
-  M_INLINE void                                                               \
+  M_INLINE void M_ATTR_COLD_FUNCTION                                          \
   M_C3(m_d1ct_,name,_resize_down)(dict_t h, size_t newSize)                   \
   {                                                                           \
     size_t oldSize = h->mask+1;                                               \