From 48bbc447335a0b3ec698e975d48ad49145780624 Mon Sep 17 00:00:00 2001
From: Krunal Bauskar <mysqlonarm@gmail.com>
Date: Wed, 15 Sep 2021 16:18:39 +0800
Subject: [PATCH] MDEV-26609 : Avoid deriving ELEMENT_PER_LATCH from cacheline
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* buffer pool has latches that protect access to pages.

* there is a latch per N pages.
  (check page_hash_table for more details)

* N is calculated based on the cacheline size.

* for example: if cacheline size is
  : 64 then 7 pages pointers + 1 latch can be hosted on the same cacheline
  : 128 then 15 pages pointers + 1 latch can be hosted on the same cacheline

* arm generally have wider cacheline so with arm 1 latch is used
  to access 15 pages vs with x86 1 latch is used to access 7 pages.
  Naturally, the contention is more with arm case.

* said patch help relax this contention by limiting the elements
  per cacheline to 7 (+ 1 latch slot).
  for wider-cacheline (say 128), the remaining 8 slots are kept empty.
  this ensures there are no 2 latches on the same cacheline to avoid
  latch level contention.

Based on suggestion from Marko, the same logic is now extended to
lock_sys_t::hash_table.
---
 storage/innobase/include/buf0buf.h   | 16 +++++++++++++---
 storage/innobase/include/lock0lock.h | 13 ++++++++++---
 2 files changed, 23 insertions(+), 6 deletions(-)
diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
index 2ad731e7b94a9..3f922708ef18a 100644
--- a/storage/innobase/include/buf0buf.h
+++ b/storage/innobase/include/buf0buf.h
@@ -1726,10 +1726,15 @@ class buf_pool_t
   /** Hash table with singly-linked overflow lists. @see hash_table_t */
   struct page_hash_table
   {
+    static_assert(CPU_LEVEL1_DCACHE_LINESIZE >= 64, "less than 64 bytes");
+    static_assert(!(CPU_LEVEL1_DCACHE_LINESIZE & 63),
+      "not a multiple of 64 bytes");
+
     /** Number of array[] elements per page_hash_latch.
     Must be one less than a power of 2. */
-    static constexpr size_t ELEMENTS_PER_LATCH= CPU_LEVEL1_DCACHE_LINESIZE /
-      sizeof(void*) - 1;
+    static constexpr size_t ELEMENTS_PER_LATCH= 64 / sizeof(void*) - 1;
+    static constexpr size_t EMPTY_SLOTS_PER_LATCH=
+      ((CPU_LEVEL1_DCACHE_LINESIZE / 64) - 1) * (64 / sizeof(void*));
 
     /** number of payload elements in array[] */
     Atomic_relaxed<ulint> n_cells;
@@ -1746,7 +1751,12 @@ class buf_pool_t
     /** @return the index of an array element */
     ulint calc_hash(ulint fold) const { return calc_hash(fold, n_cells); }
     /** @return raw array index converted to padded index */
-    static ulint pad(ulint h) { return 1 + (h / ELEMENTS_PER_LATCH) + h; }
+    static ulint pad(ulint h)
+    {
+      ulint latches= h / ELEMENTS_PER_LATCH;
+      ulint empty_slots= latches * EMPTY_SLOTS_PER_LATCH;
+      return 1 + latches + empty_slots + h;
+    }
   private:
     /** @return the hash value before any ELEMENTS_PER_LATCH padding */
     static ulint hash(ulint fold, ulint n) { return ut_hash_ulint(fold, n); }
diff --git a/storage/innobase/include/lock0lock.h b/storage/innobase/include/lock0lock.h
index 859441afcc0ce..c2fef3baaacaa 100644
--- a/storage/innobase/include/lock0lock.h
+++ b/storage/innobase/include/lock0lock.h
@@ -609,8 +609,9 @@ class lock_sys_t
 
     /** Number of array[] elements per hash_latch.
     Must be LATCH less than a power of 2. */
-    static constexpr size_t ELEMENTS_PER_LATCH= CPU_LEVEL1_DCACHE_LINESIZE /
-      sizeof(void*) - LATCH;
+    static constexpr size_t ELEMENTS_PER_LATCH= (64 / sizeof(void*)) - LATCH;
+    static constexpr size_t EMPTY_SLOTS_PER_LATCH=
+      ((CPU_LEVEL1_DCACHE_LINESIZE / 64) - 1) * (64 / sizeof(void*));
 
     /** number of payload elements in array[]. Protected by lock_sys.latch. */
     ulint n_cells;
@@ -632,9 +633,15 @@ class lock_sys_t
 
     /** @return the index of an array element */
     inline ulint calc_hash(ulint fold) const;
+
     /** @return raw array index converted to padded index */
     static ulint pad(ulint h)
-    { return LATCH + LATCH * (h / ELEMENTS_PER_LATCH) + h; }
+    {
+      ulint latches= LATCH * (h / ELEMENTS_PER_LATCH);
+      ulint empty_slots= (h / ELEMENTS_PER_LATCH) * EMPTY_SLOTS_PER_LATCH;
+      return LATCH + latches + empty_slots + h;
+    }
+
     /** Get a latch. */
     static hash_latch *latch(hash_cell_t *cell)
     {